# Verification Code Detection -CNN

### 1.dataset generation

In [None]:
import random

from captcha.image import ImageCaptcha

data_path = "../Dataset"

# number
number = ['0','1','2','3','4','5','6','7','8','9']

# lower character
character = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']

# upper character
upper_character = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']

# Random text generation
def random_captcha_text(char_set = number,captcha_size = 4):
    captcha_text = []
    for i in range(captcha_size):
        c = random.choice(char_set)
        captcha_text.append(c)
    return captcha_text 

# 随机产生验证码图片
def gen_capthcha_text_and_image(m):
    image = ImageCaptcha()
    captcha_text = random_captcha_text()  # 生成验证码文本串，默认4位
    captcha_text = ' '.join(captcha_text)  # 生成标签 [4 3 5 8]

    # 保存验证码图片
    image.write(captcha_text, "../Dataset/image/" + '%.4d' % m + '.jpg')  # 保存图片

    # 将标签信息写入
    with open(data_path + "/label.txt", "a") as f:
        f.write(captcha_text)
        f.writelines("\n")

for m in range(500):
    gen_capthcha_text_and_image(m)



### 2.convulotion layer & training

In [21]:
import os
import time

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms

import datetime

class MyDataset(Dataset):
    def __init__(self,root_dir,label_file,transform=None):
        self.root_dir = root_dir
        self.label = np.loadtxt(label_file)
        self.transform = transform

    def __getitem__(self,idx):
        img_name = os.path.join(self.root_dir,'%.4d.jpg' % idx)
        image = Image.open(img_name)
        labels = self.label[idx]
        if self.transform:
            image = self.transform(image)
        return image,labels

    def __len__(self):
        return (self.label.shape[0])
    
class ConvNet(nn.Module):

    def __init__(self):
        super(ConvNet,self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3,32,kernel_size=4,stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.2,inplace=True),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(32,64,kernel_size=4,stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2,inplace=True),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(64,64,kernel_size=3,stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2,inplace=True),
            nn.MaxPool2d(kernel_size=2),  
        )
        self.fc1 = nn.Linear(64*7*20,512)
        self.fc2 = nn.Linear(512,40)

    def forward(self,x):
        x = self.conv(x)
        x = x.view(x.size(0),-1)
        x = self.fc1(x)
        x = F.leaky_relu(x,0.2)
        x = self.fc2(x)
        return x

def loss_function(output,label):
    loss = nn.CrossEntropyLoss()

    output = output.contiguous().view(-1,10)
    label = label.contiguous().view(-1)

    total_loss = loss(output,label)

    return total_loss

def open_log_file(file_name = None):
    file = open('../Result/'+ file_name,'w',encoding = 'utf-8')
    return file

def close_log_file(file=None):
    file.close()

def log(msg='',file=None,print_msg=True,end='\n'):
    if print_msg:
        print(msg)
    
    now = datetime.datetime.now()
    t=str(now.year)+'/'+str(now.month)+'/'+str(now.day)+' '+str(now.hour).zfill(2)+':'+str(now.minute).zfill(2)+':'+str(now.second).zfill(2)
    
    if isinstance(msg,str):
        lines = msg.split('\n')
    else:
        lines = [msg]

    for line in lines:
        if line ==lines[-1]:
            file.write('['+t+']'+str(line)+end)
        else:
            file.write('['+t+']'+str(line))

file_path = "../Dataset/image/"
label_path = "../Dataset/label.txt"
model_path = "../model/checkpoint/best_model.pkl"
batch_size = 128
epochs = 100
learning_rate = 0.003

dataset = MyDataset(file_path,label_path,transform=transforms.ToTensor())

dataloader = DataLoader(dataset,batch_size=batch_size,shuffle=True,drop_last=True)

dataset_size = len(dataset)

model = ConvNet()

optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

best_model = model.state_dict()
best_acc = 0.0

file = open_log_file(file_name='ConvNet')

for epoch in range(epochs):
    epoch_acc=0
    epoch_count=0
    epoch_loss=0

    if epoch == 0:
        log('[模型结构]',file,False)
        log(model,file,False)
    
    for x,y in dataloader:
        optimizer.zero_grad()
        pred = model(x)
        loss = loss_function(pred,y.long())
        epoch_loss += loss.item()
        epoch_count += pred.contiguous().view(-1,10).argmax(axis = 1).eq(y.contiguous().view(-1)).sum().item()
       
        loss.backward()
        optimizer.step()

    epoch_acc = epoch_count / (len(y)*4*3)
    epoch_loss /= len(y)*4*3

    log("[EPOCH：%s]"%str(epoch+1),file,True)
    log("训练损失为：{:.4f}".format(epoch_loss)+'t'+"训练精度为：{:.4f}".format(epoch_acc),file,True)

    if epoch_acc > best_acc:
        best_acc = epoch_acc
        best_model = model.state_dict()

    if epoch == epochs-1:
        torch.save(best_model,model_path)

print("[finish training]")  

close_log_file(file)




[EPOCH：1]
训练损失为：0.0263t训练精度为：0.1081
[EPOCH：2]
训练损失为：0.0273t训练精度为：0.1120
[EPOCH：3]
训练损失为：0.0172t训练精度为：0.1126
[EPOCH：4]
训练损失为：0.0080t训练精度为：0.1217
[EPOCH：5]
训练损失为：0.0051t训练精度为：0.1328
[EPOCH：6]
训练损失为：0.0047t训练精度为：0.1354
[EPOCH：7]
训练损失为：0.0045t训练精度为：0.1452
[EPOCH：8]
训练损失为：0.0044t训练精度为：0.1823
[EPOCH：9]
训练损失为：0.0043t训练精度为：0.1986
[EPOCH：10]
训练损失为：0.0043t训练精度为：0.1999
[EPOCH：11]
训练损失为：0.0041t训练精度为：0.2435
[EPOCH：12]
训练损失为：0.0041t训练精度为：0.2520
[EPOCH：13]
训练损失为：0.0040t训练精度为：0.2741
[EPOCH：14]
训练损失为：0.0039t训练精度为：0.3118
[EPOCH：15]
训练损失为：0.0039t训练精度为：0.3177
[EPOCH：16]
训练损失为：0.0038t训练精度为：0.3307
[EPOCH：17]
训练损失为：0.0037t训练精度为：0.3730
[EPOCH：18]
训练损失为：0.0035t训练精度为：0.4134
[EPOCH：19]
训练损失为：0.0033t训练精度为：0.4570
[EPOCH：20]
训练损失为：0.0032t训练精度为：0.4798
[EPOCH：21]
训练损失为：0.0030t训练精度为：0.5052
[EPOCH：22]
训练损失为：0.0028t训练精度为：0.5456
[EPOCH：23]
训练损失为：0.0026t训练精度为：0.5781
[EPOCH：24]
训练损失为：0.0024t训练精度为：0.6315
[EPOCH：25]
训练损失为：0.0022t训练精度为：0.6693
[EPOCH：26]
训练损失为：0.0020t训练精度为：0.7005
[EPOCH：27]
训练损失为：0.0019t训练精度为：0.7188
[EPOCH：28]

### 3.predict label

In [26]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torchvision import transforms

img_path = r'../Dataset/image/0457.jpg' # 测试验证码图片

img = Image.open(img_path) # 打开图片
img = transforms.ToTensor()(img) # 将其转换成tensor
img = torch.unsqueeze(img, dim=0) # 处理成模型输入格式 [batch_size, 3, 60, 160]

# 自定义卷积网络模型
class ConvNet(nn.Module):

    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=4, stride=1, padding=2),  # 验证码的大小为 [3, 60, 160]
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.2, inplace=True),
            nn.MaxPool2d(kernel_size=2),  # [batch_size, 32, 30, 80]

            nn.Conv2d(32, 64, kernel_size=4, stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.MaxPool2d(kernel_size=2),  # [batch_size, 64, 15, 40]

            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.MaxPool2d(kernel_size=2)  # [batch_size, 64, 7, 20]
        )

        self.fc1 = nn.Linear(64 * 7 * 20, 512)
        self.fc2 = nn.Linear(512, 40)  # 每个图片中有4个数字，每个数字为10分类，所以为40个输出

    def forward(self, x):
        # 使用卷积提取特征
        x = self.conv(x) # [batch_size, 64, 7, 20]
        
        # 将特征图拉伸
        x = x.view(x.size(0), -1)  # [batch_size, 64 * 7 * 30] 或 [batch_size, 8960]
        
        # 使用输出层进行分类
        x = self.fc1(x) # [batch_size, 512]
        x = F.leaky_relu(x, 0.2)
        x = self.fc2(x) # [batch_size, 40]

        return x

# 加载模型权重
model = ConvNet()
model.load_state_dict(torch.load('../model/checkpoint/best_model.pkl'))


# 验证码识别
pred = model(img)
predict_captcha = pred.contiguous().view(-1, 10).argmax(axis=1).numpy().tolist()
print('验证码: ', predict_captcha)


验证码:  [7, 9, 7, 3]
