In [None]:
import cv2
import os
import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
def read_pic(folder_path):
    # 读取验证码图片数据
    images = []
    # folder_path = '/home/anaconda/code/demo/captcha_train/my_train'
    pic_names = os.listdir(folder_path)
    for pic_name in pic_names:
        pic_path = os.path.join(folder_path, pic_name)
        image = cv2.imread(pic_path)
        # print(image)
        if image is not None:
            images.append(image)
    return images

In [None]:
def read_labels(folder_path):
    # 读取验证码图片标签
    labels = []
    pic_names = os.listdir(folder_path)
    for pic_name in pic_names:
        labels.append(pic_name[:4])
    return labels

my_train_labels = read_labels('/home/anaconda/code/demo/captcha_train')
# my_test_labels = read_labels('/home/anaconda/code/demo/captcha_test/my_test')

In [None]:
# 加载训练真实值
def encoding_labels(labels, num_labels):
    # 将验证码标签转换为独热向量
    result = []
    for i in range(num_labels): 
        y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        my_labels = list(labels[i]) # 字符串转列表，方便分离数字
        for label in my_labels:
            y[int(label)] += 1
        result.append(y)
    return result
    
train_labels = encoding_labels(my_train_labels, 100)
train_labels = np.array(train_labels)

In [None]:
def preprocess_images(images):
    # 数据预处理
    processed_images = []
    for image in images:
        resized_image = cv2.resize(image, (256, 256)) # 调整图片大小
        gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY) # 调整为灰度图像
        normalized_image = gray_image / 255.0 # 归一化处理
        processed_images.append(normalized_image)
    # print(processed_images)
    return processed_images

In [None]:
# 加载训练和测试数据集
def captcha_data_load(file_path):
    my_images = read_pic(file_path)
    processed_my_images = preprocess_images(my_images)
    processed_my_images = torch.tensor(processed_my_images)
    processed_my_images = processed_my_images.to(torch.float32)
    return processed_my_images

train_file_path = '/home/anaconda/code/demo/captcha_train'
train_data = captcha_data_load(train_file_path)

test_file_path = '/home/anaconda/code/demo/captcha_test/my_test'
test_data = captcha_data_load(test_file_path)

# print(train_data)
# print(test_data)

In [None]:
# 读取数据集
def captcha_data_indice(batch_size, data, labels):
    num_examples = len(data)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i : min(i + batch_size, num_examples)])
        # print(batch_indices)
        yield data[batch_indices], labels[batch_indices]
        
batch_size = 10       
for X, y in captcha_data_indice(batch_size, train_data, train_labels):()

In [None]:
 # 定义神经网络
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 =  nn.Conv2d(1, 16, kernel_size = 3, stride = 2, padding = 1) # 16 * 128 * 128
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2) # 16 * 64 * 64
        self.conv2 = nn.Conv2d(16, 32, kernel_size = 3, stride = 2, padding = 1) # 32 * 32 * 32
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2) # 32 * 16 * 16
        self.conv3 = nn.Conv2d(32, 64, kernel_size = 3, stride = 2, padding = 1) # 64 * 8 * 8
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size = 2, stride = 2) # 64 * 4 * 4
        self.fc1 = nn.Linear(64* 4* 4, 256)    #(64 * 4 * 4, 256)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(256, 10)    #(256, 10)
 
    def forward(self, x):
        x = x[:, :, None, None]
        x = torch.reshape(x, [1, 1, 256, 256])
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        x = x.view(-1, 64* 4 *4)
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.fc2(x)
        return x

In [None]:
# 定义损失函数(交叉熵损失)和优化算法(随机梯度下降)等,并初始化参数
net = Net()
num_epochs = 10000
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001)

In [None]:
# for i in range(X.size(0)):
    # print(F.softmax(net(X[i])))

In [None]:
# 训练
for epoch in range(num_epochs):
    # for i, (inputs, outputs) in enumerate(captcha_data_indice(batch_size, X, y), 0):
    for inputs, outputs in captcha_data_indice(batch_size, X, y):
        # 向前传播
        # print(inputs.shape, outputs.shape)
        y_hat = torch.zeros(10, 10)
        for i in range(inputs.size(0)):
            input_elements = net(inputs[i])
            y_hat[i] = input_elements
        # 对y的数据类型转换  
        y = torch.tensor(outputs)
        y = y.float()
        loss = criterion(y_hat, y)
        
        # 反向传播和优化
        loss.backward()
        optimizer.step()
        
        # if i % 100 == 0:
        print('epoch:', epoch + 1, 'loss:', loss.item() / batch_size, '\n')
        # loss= 0.0