<a href="https://colab.research.google.com/github/carbonkat/Coding-Projects/blob/main/Captcha_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import matplotlib.pylab as plt
from torch.utils.data import Dataset, DataLoader
from torch.distributions import normal
import time
import os
import numpy as np
import matplotlib.pyplot as plt
import skimage.data as data
import skimage.segmentation as seg
import skimage.filters as filters
import skimage.draw as draw
import skimage.color as color
from skimage import io

In [24]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [25]:
import cv2
from google.colab.patches import cv2_imshow

def image_processing(image):
  #first convert image to grayscale
  gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  (thresh, blackAndWhiteImage) = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY_INV)

  #close holes in characters to make continuous lines
  kernel = np.ones((2,3),np.uint8)
  close = cv2.morphologyEx(blackAndWhiteImage, cv2.MORPH_CLOSE, kernel)

  #open up spaces between defined lines
  opening = cv2.morphologyEx(close, cv2.MORPH_OPEN, kernel)

  #thin lines to remove occluding line
  kernel = np.ones((2,2),np.uint8)
  erosion = cv2.erode(opening,kernel,iterations = 1)

  opening2 = cv2.morphologyEx(erosion, cv2.MORPH_OPEN, np.ones((2,1),np.uint8))

  return opening2

In [26]:
def get_letters(image):
  x, y, w, h = 30, 12, 21, 38
  letters = []
  for  i in range(5):
    # get the bounding rectangle
    #image = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 1)
    letter = image[y:y+h,x:x+w]
    letter = cv2.resize(letter, (28, 28))
    letters.append(letter)
    x += w
  return letters

In [27]:
import glob
import cv2
from google.colab.patches import cv2_imshow

download_path = '/content/gdrive/My Drive/Colab_Notebooks/input/samples'
#obtain all images using recursive downloading through glob module
images = glob.glob(download_path + '/*.png')

def get_name(image):
  path = image.split('samples/')
  path = path[1].split('.')
  name = path[0]
  if len(name) > 5:
    print(name)
  return name

def build_character_list(image_paths):
  indiv_characters = []
  indiv_labels = []
  for image_path in image_paths:
    #read in captcha image
    image = io.imread(image_path)
    image_processed = image_processing(image)
    letters = get_letters(image_processed)
    #get name
    name = get_name(image_path)
    for i in range(5):
      indiv_characters.append(letters[i])
      indiv_labels.append(str_to_one_hot(name[i]))
    #print("done!")
  return indiv_characters, indiv_labels

#one hot encoding of labels
def str_to_one_hot(str):
  vals = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
          'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  #get the index of the string---this will be its encoded value
  ind = vals.index(str)
  return ind

In [28]:
from collections import Counter
from random import shuffle

def explore(labels):
  tally = Counter(labels)
  return tally

def oversample(images, labels, tally):
  random.seed(42)
  max = tally.most_common()[0][1]
  print(max)
  for i in tally.items():
    count = i[1]
    indices = [j for j, x in enumerate(labels) if x == i[0]]
    while count < max:
      #reshuffle each time to ensure randomness
      shuffle(indices)
      labels.append(labels[indices[0]])
      images.append(np.copy(images[indices[0]]))
      count += 1
  print(Counter(labels))

In [29]:
#takes in a list of images and converts to characters and labels
class CaptchaDataset(Dataset):
    """Captcha dataset."""

    def __init__(self, image_list, transform=None):
        """
        Args:
            image_list (list): list of paths to images
            transforms (list): list of transforms to be applied to the data
        """
        self.characters, self.labels = build_character_list(image_list)
        oversample(self.characters, self.labels, explore(self.labels))
        self.transform = transform
        self.count = 0

    def __len__(self):
        return len(self.characters)

    def __getitem__(self, idx):
        #if the index is a tensor, convert to a list
        if torch.is_tensor(idx):
            idx = idx.tolist()

        x, y = self.characters[idx], self.labels[idx]
        
        if self.transform:
            x, y = self.transform(x,y)
        return x, y

In [30]:
#helper class to convert everything to tensors
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, x, y):
        letter, solution = x, y
        letter = np.expand_dims(letter, axis=0)
        transp_letter = torch.Tensor(letter)
        # the solution also needs to be converted to a tensor
        solution = torch.tensor(solution,dtype=torch.float32) 
        return transp_letter, solution

In [31]:
import random
#randomly shuffle images to ensure datasets are unbiased
random.seed(42)
random.shuffle(images)
train_dataset = CaptchaDataset(images[:900], transform=ToTensor())
test_dataset = CaptchaDataset(images[900:], transform=ToTensor())
print("length of training dataset: ", len(train_dataset))
print("length of testing dataset: ", len(test_dataset))

#check that the datatypes and shapes of the first four datapoints are correct
for i in range(4):
    x, y = train_dataset[i]
    print(i, y)
    print(x.size())
    print(type(x))

452
Counter({2: 452, 11: 452, 24: 452, 31: 452, 10: 452, 7: 452, 33: 452, 5: 452, 14: 452, 6: 452, 12: 452, 15: 452, 13: 452, 3: 452, 22: 452, 4: 452, 21: 452, 1: 452, 32: 452})
73
Counter({7: 73, 11: 73, 21: 73, 3: 73, 5: 73, 14: 73, 22: 73, 10: 73, 15: 73, 32: 73, 2: 73, 1: 73, 24: 73, 12: 73, 31: 73, 4: 73, 13: 73, 33: 73, 6: 73})
length of training dataset:  8588
length of testing dataset:  1387
0 tensor(2.)
torch.Size([1, 28, 28])
<class 'torch.Tensor'>
1 tensor(11.)
torch.Size([1, 28, 28])
<class 'torch.Tensor'>
2 tensor(24.)
torch.Size([1, 28, 28])
<class 'torch.Tensor'>
3 tensor(31.)
torch.Size([1, 28, 28])
<class 'torch.Tensor'>


In [32]:
train_dataloader = DataLoader(train_dataset, batch_size=256,
                        shuffle=True, num_workers=2)
test_dataloader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=2)

In [33]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        #print(x.shape)
        #print(x.view(x.shape[0],-1))
        return x.view(x.shape[0], -1)

class Reshape(torch.nn.Module):
    def forward(self, x):
        return x.view(-1,1,28,28)

In [34]:
le_net = torch.nn.Sequential(
    Reshape(),
    nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2),
    nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
    nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    Flatten(),
    nn.Linear(in_features=16*5*5, out_features=120),
    nn.Sigmoid(),
    nn.Linear(120, 84),
    nn.Sigmoid(),
    nn.Linear(84, 35)
)

In [35]:
def evaluate_accuracy_updated(data_iter, net,device=torch.device('cpu')):
    """Evaluate accuracy of a model on the given data set."""
    acc_sum,n = torch.tensor([0],dtype=torch.float32,device=device),0
    for X,y in data_iter:
        # If device is the GPU, copy the data to the GPU.
        X,y = X.to(device),y.to(device)
        net.eval()
        with torch.no_grad():
            y = y.long()
            acc_sum += torch.sum((torch.argmax(net(X), dim=1) == y))
            n += y.shape[0]
    return acc_sum.item()/n

In [36]:
def try_gpu():
    """If GPU is available, return torch.device as cuda:0; else return torch.device as cpu."""
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    return device

device = try_gpu()

In [37]:
def train_model_lenet(net, train_iter, test_iter,criterion, num_epochs, batch_size, device,lr=None):
    """Train and evaluate a model with CPU or GPU."""
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    for epoch in range(num_epochs):
        train_l_sum = torch.tensor([0.0],dtype=torch.float32,device=device)
        train_acc_sum = torch.tensor([0.0],dtype=torch.float32,device=device)
        n, start = 0, time.time()
        for X, y in train_iter:
            net.train()
            #print(X.type)
            #print(y.type)
            
            optimizer.zero_grad()
            X,y = X.to(device),y.to(device) 
            #print(X)
            #print(y)
            y_hat = net(X)
            y = y.type(torch.LongTensor)
            #print(y_hat.dtype)
            #print(y.dtype)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()
            
            with torch.no_grad():
                y = y.long()
                train_l_sum += loss.float()
                train_acc_sum += (torch.sum((torch.argmax(y_hat, dim=1) == y))).float()
                n += y.shape[0]
        test_acc = evaluate_accuracy_updated(test_iter, net,device)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum/n, train_acc_sum/n, test_acc,
                 time.time() - start))

In [38]:
lr, num_epochs = 0.9, 30
batch_size = 256

def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)

le_net.apply(init_weights)
le_net = le_net.to(device)

criterion = nn.CrossEntropyLoss()

train_model_lenet(le_net, train_dataloader, test_dataloader, criterion,num_epochs, batch_size,device, lr)

training on cpu
epoch 1, loss 0.0121, train acc 0.060, test acc 0.053, time 5.2 sec
epoch 2, loss 0.0118, train acc 0.061, test acc 0.053, time 4.3 sec
epoch 3, loss 0.0117, train acc 0.068, test acc 0.125, time 4.2 sec
epoch 4, loss 0.0110, train acc 0.121, test acc 0.222, time 3.5 sec
epoch 5, loss 0.0090, train acc 0.290, test acc 0.363, time 3.1 sec
epoch 6, loss 0.0073, train acc 0.445, test acc 0.420, time 3.1 sec
epoch 7, loss 0.0063, train acc 0.510, test acc 0.539, time 3.0 sec
epoch 8, loss 0.0054, train acc 0.605, test acc 0.637, time 3.0 sec
epoch 9, loss 0.0048, train acc 0.647, test acc 0.635, time 2.9 sec
epoch 10, loss 0.0044, train acc 0.681, test acc 0.663, time 3.0 sec
epoch 11, loss 0.0040, train acc 0.707, test acc 0.690, time 3.0 sec
epoch 12, loss 0.0037, train acc 0.731, test acc 0.712, time 4.1 sec
epoch 13, loss 0.0033, train acc 0.766, test acc 0.730, time 3.2 sec
epoch 14, loss 0.0032, train acc 0.773, test acc 0.766, time 2.9 sec
epoch 15, loss 0.0029, trai

In [40]:
for X, y in test_dataloader:
    break

def get_labels(label):
  vals = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
          'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  #get the index of the string---this will be its encoded value
  return [vals[int(i)] for i in label]

#numpy array of predictions
print(X.shape)
true_labels = get_labels(y.numpy())
pred_labels = get_labels(le_net(X).argmax(dim=1).numpy())
print()
for i in range(len(true_labels)):
  print("True:", true_labels[i], "Predicted:", pred_labels[i])

torch.Size([256, 1, 28, 28])

True: 8 Predicted: 8
True: c Predicted: c
True: m Predicted: m
True: 4 Predicted: 4
True: 6 Predicted: 6
True: f Predicted: f
True: n Predicted: n
True: c Predicted: c
True: n Predicted: f
True: b Predicted: n
True: g Predicted: g
True: 8 Predicted: 8
True: 8 Predicted: 8
True: 8 Predicted: 8
True: x Predicted: 6
True: n Predicted: n
True: x Predicted: x
True: c Predicted: c
True: m Predicted: n
True: n Predicted: m
True: 8 Predicted: 8
True: 3 Predicted: 3
True: 2 Predicted: 2
True: f Predicted: f
True: 3 Predicted: 3
True: 6 Predicted: 6
True: 4 Predicted: 4
True: b Predicted: b
True: 3 Predicted: 3
True: p Predicted: p
True: 8 Predicted: 8
True: d Predicted: d
True: 4 Predicted: 4
True: w Predicted: w
True: m Predicted: m
True: 6 Predicted: 6
True: 5 Predicted: 5
True: m Predicted: n
True: 8 Predicted: 8
True: 5 Predicted: 5
True: e Predicted: e
True: 2 Predicted: 2
True: m Predicted: n
True: g Predicted: m
True: 2 Predicted: x
True: m Predicted: m
True