In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os 
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import string
import torchvision.transforms as transforms


def get_mean_std(dataset_class, names_train, path, is_train, transform):
    dataset = dataset_class(names_train, PATH, is_train, transform)
    train_tensors = [dataset[i][0] for i in range(len(names_train))]
    train_tensors_stack = torch.stack(train_tensors)
    return train_tensors_stack.mean(), train_tensors_stack.std()


class CaptchaDataset(Dataset):
    def __init__(self, dataframe, root_dir, is_train, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.is_train = is_train
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()
            
        symbols = list(map(str, range(10))) + list(string.ascii_lowercase) 
        symbol_to_id = {key: value for value, key in enumerate(symbols)}
        
        img_name = os.path.join(self.root_dir, self.dataframe['filename'][index])
        image = Image.open(img_name).convert('L')
        
        
        if self.is_train:
            label = torch.tensor(np.array(
                [symbol_to_id[idx] for idx in self.dataframe['label'][index]]))   
        else:
            label = torch.tensor(np.zeros(5))
        
        if self.transform:
            img_tensor = self.transform(image)
#             img_tensor = img_tensor[:,0,:,:]
#             img_tensor = img_tensor[:, : :, 30:145]
            
        return img_tensor, label[0]
    
    

def get_train_test_names(root_dir, test_size=0.2):
    picture_names = [[picture_name, picture_name[: -4]] for picture_name
                     in os.listdir(root_dir)]

    names_dataframe = pd.DataFrame(picture_names, columns=('filename', 'label'))
    names_train, names_test = train_test_split(names_dataframe, 
                                   test_size=test_size, 
                                   random_state=42)
    return names_train.reset_index(drop=True), names_test.reset_index(drop=True)




In [2]:
PATH = r'captcha_dataset/samples'

transform_to_tensor = transforms.Compose([
    transforms.ToTensor()
])

names_train, names_test = get_train_test_names(PATH)

sample_mean, sample_std = get_mean_std(CaptchaDataset, names_train, PATH, 1, transform_to_tensor)

transform_to_tensor_and_norm = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(sample_mean, sample_std)
])

trainset = CaptchaDataset(names_train, PATH, 1, transform_to_tensor_and_norm)
testset = CaptchaDataset(names_test, PATH, 1, transform_to_tensor_and_norm)

batch_size = 5
num_workers = 1

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=num_workers)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=num_workers)

# for i, data in enumerate(trainloader):
#     p, l = data
# #     p = p[:,0,:,:]
#     print(p.size())
#     break
    
# trainset[0][0]

In [3]:
def train_model(net, criterion, optimizer, trainloader, num_epochs=5):
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader):
#             print(f'{data=}')
            inputs, labels = data
#             print(f'{inputs.size()=}')
            
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            

            running_loss += loss.item()
            if i % 200 == 199:
                print('Epoch {0}/{1}, iteration {2}, loss: {3:.3f}'.format(
                    epoch + 1, num_epochs, i + 1, running_loss / 2000))
                running_loss = 0.0
        print()

    print('Finished Training')
    
    return net

In [4]:
def all_accuracy(net, testloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs, 1)
            
            
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(
        100 * correct / total))

In [5]:
import torch.nn as nn
import torch.nn.functional as F


class FeedForwardNet(nn.Module):
    def __init__(self):
        super(FeedForwardNet, self).__init__()
        self.fc1 = nn.Linear(in_features=50 * 200, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=36)
        
    def forward(self, x):
        
#         print('before flatten', x.size())
        x = torch.flatten(x, 1)
#         print(x.size())
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        
        return x
    
net = FeedForwardNet()

In [6]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [7]:
net = train_model(net, criterion, optimizer, trainloader, num_epochs=5)






Finished Training


In [9]:
all_accuracy(net, testloader)

Accuracy of the network on the 10000 test images: 97.66355140186916 %


In [46]:
t = iter(testloader)
p = next(t)
p = next(t)
m = nn.Softmax()
print(torch.argmax(m(net.forward(p[0])[3])))
transform = transforms.ToPILImage()
img = transform(p[0][3])
img.show()



tensor(23)


  print(torch.argmax(m(net.forward(p[0])[3])))


In [47]:
symbols = list(map(str, range(10))) + list(string.ascii_lowercase) 
symbol_to_id = {key: value for value, key in enumerate(symbols)}
symbol_to_id['n']

23