In [1]:
import os
import cv2
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
# import pdb

In [2]:
# 获得验证码图片的特征值和标签

# 读取验证码路径
def read_pic_paths(file_path):
    pic_paths = []
    for pic_name in os.listdir(file_path):
        pic_path = os.path.join(file_path, pic_name)
        pic_paths.append(pic_path)
    return pic_paths
train_pic_paths = read_pic_paths('captcha_train')
# print(train_pic_paths)

# 读取验证码特征值
'''def read_pic_features(pic_path):
    # 读取图片
    image = cv2.imread(pic_path)
    # 将图片转换为灰度图像
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 对图像进行二值化处理，将灰度图像转换为黑白图像
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    # 进行形态学操作，如腐蚀和膨胀，以去除噪声或连接断开的字符部分
    kernel = np.ones((3, 3), np.uint8)
    eroded = cv2.erode(binary, kernel, iterations=1)
    dilated = cv2.dilate(eroded, kernel, iterations=1)
    # 缩放图像尺寸为 CNN 模型所需的大小
    resized = cv2.resize(dilated, (256, 256))
    # 归一化图像数据，将像素值缩放到 0~1 之间
    normalized = resized.astype("float") / 255.0
    # 将图像数据转换为 4D 张量，以适应输入 CNN 模型的格式要求
    processed_image = np.expand_dims(normalized, axis=0)
    return processed_image
train_pic_features = []
for path in train_pic_paths:
    features = read_pic_features(path)
    train_pic_features.extend(features)'''

def preprocess_image(image):
    # 转换为灰度图像
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 使用高斯模糊去除噪声
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    # 应用自适应阈值处理二值化
    _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    return thresh
def extract_features(image):
    # 提取图像轮廓
    contours, _ = cv2.findContours(image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    features = []
    for contour in contours:
        # 计算轮廓的边界框
        x, y, w, h = cv2.boundingRect(contour)
        # 提取边界框内的图像区域
        roi = image[y:y+h, x:x+w]
        # 调整图像大小为固定尺寸（例如：256x256）
        resized_roi = cv2.resize(roi, (256, 256), interpolation=cv2.INTER_AREA)
        # 归一化图像数据，将像素值缩放到 0~1 之间
        feature = resized_roi.astype(np.uint8) / 255.0
        features.append(feature)
    return features
train_pic_features = []
for path in train_pic_paths:
    # 加载验证码图片
    image = cv2.imread(path)
    # 预处理图片
    preprocessed_image = preprocess_image(image)
    # 提取特征值
    features = extract_features(preprocessed_image)
    train_pic_features.extend(features)

i = 1    
# for features in train_pic_features:
    # print('第', i, '个\n', features, '\n', features.shape, '\n')
    # i += 1

In [3]:
# 读取验证码标签
def read_pic_labels(pic_path):
    labels = []
    for file_name in pic_path:
        label = file_name[-8 : -4]
        labels.append(label)
    return labels
train_pic_labels = read_pic_labels(train_pic_paths)
# print(train_pic_labels)

#对验证码标签进行独热编码
def one_hot_encode(labels, num_classes):
    encoded_labels = np.zeros((len(labels), num_classes))
    for i, label in enumerate(labels):
        index = ord(label) - ord('0')  # 标签值为0-9
        encoded_labels[i][index] = 1
    return encoded_labels
# 标签值数量
num_classes = 10  # 数字0-9共10个类别
# 进行独热编码
train_pic_labels_1hot = []
for labels in train_pic_labels:
    encoded_labels = one_hot_encode(labels, num_classes)
    train_pic_labels_1hot.append(encoded_labels)
# for labels in train_pic_labels_1hot:
    # print(labels)

In [4]:
 # 定义神经网络
    
# 初始化模型参数
def init_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.xavier_uniform_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias.data, 0)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        '''self.conv1 =  nn.Conv2d(1, 16, kernel_size = 3, stride = 2, padding = 1) # 16 * 128 * 128
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2) # 16 * 64 * 64
        self.conv2 = nn.Conv2d(16, 32, kernel_size = 3, stride = 2, padding = 1) # 32 * 32 * 32
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2) # 32 * 16 * 16
        self.conv3 = nn.Conv2d(32, 64, kernel_size = 3, stride = 2, padding = 1) # 64 * 8 * 8
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size = 2, stride = 2) # 64 * 4 * 4
        self.fc1 = nn.Linear(64* 4* 4, 128)    #(64 * 4 * 4, 128)'''
        self.fc1 = nn.Linear(1* 256* 256, 128)    #(1 * 256 * 256, 128)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)    #(128, 10)
 
    def forward(self, x):
        x = x[:, :, None, None]
        x = torch.reshape(x, [10, 1, 256, 256])
        '''x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        x = x.view(-1, 64* 4 *4)'''
        x = x.view(-1, 1* 256 *256)
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.fc2(x)
        return x


net = Net()
net.apply(init_weights)

Net(
  (fc1): Linear(in_features=65536, out_features=128, bias=True)
  (relu4): ReLU()
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [5]:
# 定义数据迭代器
class DataIterator:
    def __init__(self, images, labels, batch_size, shuffle=True):
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_samples = len(labels)
        self.num_batches = int(np.ceil(self.num_samples / self.batch_size))
        self.indices = np.arange(self.num_samples)

        if self.shuffle:
            np.random.shuffle(self.indices)

    def __iter__(self):
        self.current_batch = 0
        return self

    def __next__(self):
        if self.current_batch >= self.num_batches:
            raise StopIteration

        start_idx = self.current_batch * self.batch_size
        end_idx = min(start_idx + self.batch_size, self.num_samples)
        batch_indices = self.indices[start_idx:end_idx]
        batch_images = [self.images[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]

        self.current_batch += 1

        return np.array(batch_images), np.array(batch_labels)
    
# 创建数据迭代器
batch_size = 10
data_iterator = DataIterator(train_pic_features, train_pic_labels_1hot, batch_size)
# for i, (images, labels) in enumerate(data_iterator, 0):
    # print(i, '批第一个', '\n', images[0], '\n', labels[0], '\n')

In [6]:
# 定义函数
num_epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001)

In [8]:
# 训练
for epoch in range(num_epochs):
    loss = 0.0
    # running_loss = 0.0
    # pdb.set_trace() # 调试
    for i, (batch_images, batch_labels) in enumerate(data_iterator, 0):
        batch_images = torch.Tensor(batch_images)
        batch_labels = torch.Tensor(batch_labels)
        
        optimizer.zero_grad() # 清除优化器关于参数的累计梯度值
        outputs = net(batch_images)
        
        # print('模型输出值：\n', F.softmax(outputs[0]))
        # 将训练值和标签值大小统一，并进行损失值计算
        # 在第2个维度上增加一个新的维度
        # outputs_gai = outputs.unsqueeze(1)
        # 重复四次
        # outputs_gai = outputs_gai.repeat(1, 4, 1)
        # print(outputs_gai.shape, batch_labels.shape)
        # print(outputs_gai)
        # print('标签值：\n', batch_labels[0])
        
        loss = criterion(outputs_gai, batch_labels)
        print('损失值：\n', loss.item(), '\n')
        
        loss.backward() # 将损失loss向输入测进行反向传播
        # 检查梯度
        for name, param in net.named_parameters():
            if param.grad is not None:
                print(name, param.grad)
                
        optimizer.step() # 利用优化器更新参数
 
        # running_loss += loss.item()
        if i % 10 == 9:
            # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 1000))
            # running_loss = 0.0
            print('epoch', epoch + 1, 'loss:', loss.item(), '\n')
            loss = 0.0

损失值：
 0.5545177459716797 

fc1.weight tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
fc1.bias tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.])
fc2.weight tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
       