In [37]:
import cv2
import numpy as np
from skimage import morphology
from skimage.io import imread
from skimage.morphology import medial_axis, skeletonize
from scipy.ndimage import distance_transform_edt
import math
import copy
import tensorflow as tf
from PIL import Image
from scipy import signal
from torchvision.transforms import ToPILImage

In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# 定义网络结构
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.fc1 = nn.Linear(64 * 5 * 5, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [39]:
def mask(data):    
    x, y = data.shape
    newData = np.zeros([x, y], dtype=complex)
    for i in range(x):
        for j in range(y):
            if(i<x/2+10 and i>x/2-10 or j>y/2-10 and j<y/2+10):
                newData[i, j] = 0
            else:
                newData[i, j] = data[i, j]    
            if(i<x/2+10 and i>x/2-10 and j>y/2-10 and j<y/2+10):
                newData[i, j] = data[i, j]
            # if(i>x/2+200 or i<x/2-200 or j<y/2-200 or j>y/2+200):
            #     newData[i, j] = data[i, j]


    return newData

def ButterFilter(image, cutoff_freq, order):
        # cutoff_freq = 15  # 截止频率
        # order = 4  # 阶数

        # 计算归一化截止频率
        nyquist_freq = 0.5 * image.shape[0]  # 奈奎斯特频率
        normalized_cutoff_freq = cutoff_freq / nyquist_freq

        # 创建巴特沃斯低通滤波器
        b, a = signal.butter(order, normalized_cutoff_freq, btype='low')

        # 对图像应用滤波器
        filtered_image = signal.filtfilt(b, a, image)

        # 将浮点值转换为8位无符号整数值（0-255范围）
        filtered_image = np.uint8(filtered_image)

        return np.real(filtered_image)





def threshold(data):
    data = np.real(data)
    max = np.max(data)
    min = np.min(data)
    thresh = (max)*3/5
    x, y = data.shape
    newData = np.zeros([x, y])
    for i in range(x):
        for j in range(y):
            if(data[i ,j] < thresh):
                newData[i ,j] = 1
    return newData


def get_bounding_box(skeleton):
    # 查找骨架的轮廓
    skeleton = np.array(skeleton)
    contours, _ = cv2.findContours(skeleton, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # 创建空白图像
    bounding_box = np.zeros_like(skeleton)
    i, j = bounding_box.shape
    flag = False
    bounds = []
    for contour in contours:
        # 计算骨架的边界框
        x, y, w, h = cv2.boundingRect(contour)
        # print(x+h/2,y+w/2)
        # if(abs(x+h/2-i/2)<300 and abs(y+w/2-j/2)<300 and w<200 and w>10 and h<200 and h>30):
        if(w<200  and h<300 and h>25):
        # 绘制边界框
            cv2.rectangle(bounding_box, (x, y), (x + w, y + h), 255, -1)
            bounds.append([y, x, w, h])
            flag = True
    #返回全部框
    if(flag):
        print(bounds)
        return bounds, bounding_box
    else:    
        cv2.rectangle(bounding_box, (int(i/4), int(j/4)), (int(i/2), int(j/2)), 255, -1)
        bounds.append([int(i/4), int(j/4), int(i/2), int(j/2)])
        print(bounds)
        return bounds, bounding_box

def makeframe(skeleton, bounds):  
    print(skeleton.shape) 
    # temp = x
    # x = y
    # y = temp
    masks = []
    for bound in bounds:
        x, y, w, h = bound[0], bound[1], bound[2], bound[3]
        x = int(x + h/2 - max(h,w)/2-12)
        y = int(y + w/2 - max(h,w)/2-6)
        w = max(h, w)+16
        h = max(h, w)+12
        if(x<0):
            x=0
        if(y<0):
            y=0
        print('frame',x,y,w,h)

        maskSk = np.zeros([h, w])
        for i in range(h):
            for j in range(w):
                if(x+i<480 and y+j<640):
                    maskSk[i, j] = skeleton[x + i, y + j]
        masks.append(maskSk)

    return masks


def hitornot(img,SE): # 目标是1，背景是0
    img = copy.copy(img)
    r,c = img.shape[0],img.shape[1]
    img = np.pad(img,((1,1),(1,1)),'constant',constant_values=0)
    img = img.astype(np.float64) 
    res = np.zeros_like(img,dtype=np.uint8)
    for p in range(1,r+1):
        for q in range(1,c+1):
            area = copy.copy(img[p-1:p+2,q-1:q+2])
            if len(SE) == 2:
                area[SE[1][:,0],SE[1][:,1]] = math.inf
            if (area == SE[0]).all():
                res[p,q] = 1
    return res[1:-1,1:-1]

def refinement(img,B):
    img = copy.copy(img)
    hit = hitornot(img,B)
    loc1 = np.argwhere(img==1).tolist()
    loc2 = np.argwhere(hit==1).tolist()
    loc = np.array([k for k in loc1 if k in loc2])
    if len(loc)!=0:
        img[loc[:,0],loc[:,1]] = 0
    return img


# 裁剪
def crop(img): # 需要目标是1，背景是0
    SE1 = [np.array([[math.inf,0,0],[1,1,0],[math.inf,0,0]]),np.array([[0,0],[2,0]])]
    SE2 = [np.array([[math.inf,1,math.inf],[0,1,0],[0,0,0]]),np.array([[0,0],[0,2]])]
    SE3 = [np.array([[0,0,math.inf],[0,1,1],[0,0,math.inf]]),np.array([[0,2],[2,2]])]
    SE4 = [np.array([[0,0,0],[0,1,0],[math.inf,1,math.inf]]),np.array([[2,0],[2,2]])]
    SE5 = [np.array([[1,0,0],[0,1,0],[0,0,0]])]
    SE6 = [np.array([[0,0,1],[0,1,0],[0,0,0]])]
    SE7 = [np.array([[0,0,0],[0,1,0],[0,0,1]])]
    SE8 = [np.array([[0,0,0],[0,1,0],[1,0,0]])]
    ''' Step1: 删除3个即以下长度的分枝, 得到X1 '''
    X1 = copy.copy(img)
    for _ in range(3): # 连续3次操作
        for i in range(8): # 结构元序列长度为8
            SE = eval(f'SE{i+1}')
            X1 = refinement(X1,SE)
    ''' Step2: 得到X1中端点集合X2 '''
    g = []
    for i in range(8):
        SE = eval(f'SE{i+1}')
        kk = hitornot(X1,SE)
        res = np.argwhere(kk==1)
        g.extend(res)
    X2 = np.zeros_like(img,dtype=np.uint8)
    if len(g)!=0:
        g = np.array(g)
        X2[g[:,0],g[:,1]] = 1
    ''' Step3: 对端点连续进行3次膨胀 '''
    H = np.ones((3,3),dtype=np.uint8)
    X3 = copy.copy(X2)
    loc2 = np.argwhere(img==1).tolist() # 原图A
    for _ in range(3):
        X3 = cv2.dilate(X3,H,iterations=1)
        loc1 = np.argwhere(X3==1).tolist()
        loc = np.array([k for k in loc1 if not k in loc2]) # 与原图A做交集
        if len(loc)!=0:
            X3[loc[:,0],loc[:,1]] = 0
    ''' Step4: 取并集 '''
    X4 = np.zeros_like(img,dtype=np.uint8)
    loc1 = np.argwhere(X1==1).tolist()
    loc2 = np.argwhere(X3==1).tolist()
    loc1.extend(loc2)
    loc1 = np.array(loc1)
    X4[loc1[:,0],loc1[:,1]] = 1
    return X4
 
 


In [40]:
def preProcess(data):
    # fI = np.fft.fftshift(np.fft.fft2(data))
    # maskfI = mask(fI)
    # newI = np.real(np.fft.ifft2(np.fft.fftshift(maskfI)))
    # newI = ButterFilter(newI, 150, 2)
    kernel_size = (3, 3)

    # 应用平滑盒式滤波
    smoothed_image = cv2.blur(data, kernel_size)
    cv2.imshow('smooth', smoothed_image/np.max(smoothed_image))
    #thresholdData = threshold(data)

    gray_image_8uc1 = cv2.convertScaleAbs(smoothed_image)

    thresholdData = cv2.adaptiveThreshold(gray_image_8uc1, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 4)
    thresholdData = ~thresholdData

    kernel = np.ones((2, 2), np.uint8)
  
    # Using cv2.erode() method 
    erodeData = cv2.erode(thresholdData, kernel) 

    # Creating kernel
    kernel = np.ones((3, 3), np.uint8)
    
    # 膨胀
    dilateData = cv2.dilate(erodeData, kernel) 

    cv2.imshow('Process', dilateData/np.max(dilateData))
    
    #骨骼化
    skeleton0 = morphology.skeletonize(dilateData, method='lee')
    skeleton = skeleton0.astype(np.uint8)*255

    
    #框选
    bounds, bounding_box= get_bounding_box(skeleton)

    #masks
    masks = makeframe(skeleton, bounds)

    cv2.imshow('Processssss', skeleton*bounding_box/np.max(skeleton*bounding_box))

    numbers = []
    for maskSK in masks: 
        #28*28
        cv2.imshow('mask',maskSK)
        
        #膨胀
        kernel = np.ones((6, 6), np.uint8)
        dimaskSK = cv2.dilate(maskSK, kernel) 

        resizedData = cv2.resize(dimaskSK,(28,28), interpolation=cv2.INTER_AREA)
        _,resizedData = cv2.threshold(resizedData, 0.2, 1, cv2.THRESH_BINARY)
        # cv2.imshow('resize',resizedData)
        #骨骼化
        skeleton0 = morphology.skeletonize(resizedData, method='lee')
        reskeleton = skeleton0.astype(np.uint8)*255
        # cv2.imshow('ske',reskeleton)
        # pruneData = crop(reskeleton)

        # Creating kernel
        kernel = np.ones((2, 2), np.uint8)
        
        # 膨胀
        finalData = cv2.dilate(reskeleton, kernel) 

        kernel_size = (3, 3)
        finalData = cv2.GaussianBlur(finalData*8, kernel_size,  sigmaX=1)

        cv2.imshow('final',finalData/np.max(finalData))
        # 平滑
        # finalData = cv2.blur(finalData*255, kernel_size)
        numbers.append(finalData)

    return numbers, bounds
    # return finalData/np.max(finalData)

# plt.imshow(np.log(np.abs(fI)), cmap='gray')

In [41]:




# 加载模型参数
model = Net()  # 创建一个新的模型实例
model.load_state_dict(torch.load('model.pth'))

# 将模型设置为评估模式
model.eval()
# 进行与训练数据相同的预处理
transform = transforms.Compose([
    # transforms.Resize((28, 28)),
    # transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


In [42]:
def drawBounds(data, bounds):

    boundsImg = np.copy(data)
    for bound in bounds:
        x, y, w, h = bound[0], bound[1], bound[2], bound[3]
        x = int(x + h/2 - max(h,w)/2-5)
        y = int(y + w/2 - max(h,w)/2-5)
        w = max(h, w)+10
        h = max(h, w)+10
        if(x<0):
            x=0
        if(y<0):
            y=0
        print('frame',x,y,w,h)

        if(x+h>=480):
             h = 479 - x
        if(y+w>=640):
             w = 639 - y
        for i in range(h):
            boundsImg[x+i, y] = 0
            boundsImg[x+i, y+w] = 0
        for j in range(w):
                boundsImg[x, y+j] = 0
                boundsImg[x+h, y+j] = 0
    return boundsImg
        

def drawResult(data, bounds, results):
    resultImg = np.copy(data)
    for result in results:
        print(result)
        bound = bounds[result[0]]
        x, y, w, h = bound[0], bound[1], bound[2], bound[3]
        x = int(x + h/2 - max(h,w)/2-5)
        y = int(y + w/2 - max(h,w)/2-5)
        w = max(h, w)+10
        h = max(h, w)+10
        if(x<0):
            x=0
        if(y<0):
            y=0
        print('frame',x,y,w,h)

        if(x+h>=480):
             h = 479 - x
        if(y+w>=640):
             w = 639 - y
        for i in range(h):
            resultImg[x+i, y,:] = 0
            resultImg[x+i, y+w,:] = 0
        for j in range(w):
                resultImg[x, y+j,:] = 0
                resultImg[x+h, y+j,:] = 0

        # 定义绘制文本的属性
        font = cv2.FONT_HERSHEY_SIMPLEX
        position = (y, x)
        font_scale = 1
        color = (0, 0, 255)  # BGR颜色通道顺序，这里是红色
        thickness = 2

        # 绘制数字
        cv2.putText(resultImg, str(result[1]), position, font, font_scale, color, thickness)
    return resultImg
     


In [43]:
import cv2

# 打开摄像头
cap = cv2.VideoCapture(0)

while True:
    # 读取摄像头图像
    ret, frame = cap.read()

    if ret:
        # 显示摄像头画面
        cv2.imshow('Original', frame)

        # 在图像上进行实时处理（这里示例为灰度化处理）
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        preDatas, bounds = preProcess(gray)

        # 处理后的图像
        # cv2.imshow('Processed', fram)

        # images = Image.fromarray(preData)
        boundsImg = drawBounds(gray, bounds)
        cv2.imshow('boundsImg', boundsImg)


        # 将数据转换为TensorFlow张量
        # tensor = tf.convert_to_tensor(preData)
        # 对图像进行预处理

        
            


        results = []
        i = -1
        for preData in preDatas:
            # print(preData)
            cv2.imshow('test',preData/np.max(preData))
            tensor = Image.fromarray(preData/np.max(preData))
            print(tensor)
            preprocessed_image = transform(tensor).unsqueeze(0)
            model.eval()
            with torch.no_grad():
                outputs = model(preprocessed_image)

            print(outputs.data)
            # 获取预测结果
            _, predicted = torch.max(outputs.data, 1)
            predicted_label = predicted.item()

            print('Predicted label:', predicted_label)
            i += 1
            # if(predicted_label > 0.5):
            results.append([i, predicted_label])

        resultImg = drawResult(frame, bounds, results)
        cv2.imshow('result', resultImg)


        
    # 按下 'q' 键退出循环
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 释放摄像头并关闭窗口
cap.release()
cv2.destroyAllWindows()



[[412, 161, 22, 31], [401, 299, 42, 68], [373, 370, 181, 103], [352, 23, 32, 120], [345, 149, 35, 63], [344, 1, 17, 136], [319, 305, 37, 58], [315, 4, 147, 164], [288, 273, 31, 98], [276, 209, 68, 202], [274, 211, 36, 73], [244, 530, 15, 82], [223, 0, 5, 50], [208, 329, 50, 27], [191, 6, 169, 128], [75, 94, 93, 150]]
(480, 640)
frame 400 150 47 59
frame 389 280 84 96
frame 322 364 197 209
frame 340 0 136 148
frame 333 129 79 91
frame 332 0 152 164
frame 307 288 74 86
frame 303 0 180 192
frame 276 233 114 126
frame 264 136 218 230
frame 262 186 89 101
frame 232 490 98 110
frame 211 0 66 78
frame 184 323 66 78
frame 158 0 185 197
frame 63 59 166 178
frame 407 151 41 51
frame 396 281 78 88
frame 329 365 191 201
frame 347 0 130 140
frame 340 130 73 83
frame 339 0 146 156
frame 314 289 68 78
frame 310 0 174 184
frame 283 234 108 118
frame 271 137 212 222
frame 269 187 83 93
frame 239 491 92 102
frame 218 0 60 70
frame 191 324 60 70
frame 165 1 179 189
frame 70 60 160 170
<PIL.Image.Image im