In [9]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import cv2
import os
from matplotlib import pyplot as plt
import numpy as np
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD

# This part tries to use the entire captcha (27 different classes)

In [2]:
def getImages():
    path = "imagedata/"
    listOfImages= []
    kernel = np.ones((3,3),np.uint8)
    for imageFileName in sorted(os.listdir(path)):
        img = cv2.imread(path+imageFileName,cv2.IMREAD_UNCHANGED)
        eroded = cv2.dilate(img, kernel, iterations = 1)
        cropped= eroded[50:140,70:200]
        listOfImages.append(np.array(cropped))
    listOfImages = np.array(listOfImages)
    listOfImages = listOfImages.reshape(list(listOfImages.shape)+[1])
    return listOfImages

listOfImages = getImages()

In [3]:
def getLabels():
    listOfLabels = []
    with open('labels.txt') as fp:
        line = fp.readline()
        while line:
            listOfLabels.append(np.array(float(line.replace(',', '').replace(" ", "").strip())))
            line=fp.readline()
    listOfLabels = np.array(listOfLabels)
    return listOfLabels

listOfLabels = getLabels()

In [4]:
def createCategorical(listOfLabels):
    uniqueValuesCategories = np.array(list(range(0,27)))
    unique = np.unique(listOfLabels)
    dict = {}
    for key,value in zip(unique,uniqueValuesCategories):
        dict[key] = value
    newLabels = listOfLabels
    newLabels = list(map(dict.get,newLabels))
    categories = tf.keras.utils.to_categorical(newLabels)
    return categories, dict

y,dict = createCategorical(listOfLabels)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    listOfImages, y, test_size=0.33, random_state=42)


In [20]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(50, 35, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(100, activation='relu',kernel_initializer='he_uniform'))
model.add(layers.Dense(3, activation='softmax'))


In [21]:
opt = SGD(lr=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train
                    , batch_size = 90, epochs=10, shuffle = True,
                    validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [25]:
a= model.predict_classes(X_test)

In [27]:
print(a[:10])

[0 1 1 2 0 1 0 1 2 0]


In [219]:
newX=[]
newY=[]
for i in range(27):
    count = 0
    for j in range(len(X_train)):
        
        if y_train[j][i]==1:
            count += 1
    print("class {}: {}".format(i,count))

class 0: 28
class 1: 25
class 2: 24
class 3: 34
class 4: 32
class 5: 30
class 6: 39
class 7: 34
class 8: 29
class 9: 26
class 10: 31
class 11: 28
class 12: 26
class 13: 38
class 14: 25
class 15: 23
class 16: 47
class 17: 33
class 18: 21
class 19: 25
class 20: 28
class 21: 27
class 22: 25
class 23: 28
class 24: 34
class 25: 26
class 26: 38


# This part only uses the digits cropped 

In [16]:
def getImages2():
    path = "imagedata/"
    listOfImages= []
    kernel = np.ones((3,3),np.uint8)
    for imageFileName in sorted(os.listdir(path)):
        img = cv2.imread(path+imageFileName,cv2.IMREAD_UNCHANGED)
        eroded = cv2.dilate(img, kernel, iterations = 1)
#         thresholded = cv2.THRESH_BINARY
        cropped= eroded[50:140,70:200]
        first = cropped [30:80,15:50]
        second = cropped [30:80,15+30:50+30]
        third = cropped [30:80,15+60:50+60]
        listOfImages.extend([first,second,third])
    listOfImages = np.array(listOfImages)
    listOfImages = listOfImages.reshape(listOfImages.shape[0],50,35,1)
    listOfimagesNorm = listOfImages.astype('float32')
    listOfimagesNorm = listOfimagesNorm  / 255.0
    return listOfimagesNorm

listOfImages = getImages2()

In [17]:
def getLabels2():
    listOfLabels = []
    with open('labels.txt') as fp:
        line = fp.readline()
        while line:
            parsedLine = line.replace(',', '').replace(" ", "").strip()
            listOfLabels.extend([int(i) for i in list(parsedLine)])
            line=fp.readline()
                    
    listOfLabels = np.array(listOfLabels)
    return listOfLabels

listOfLabels = getLabels2()

In [18]:
categories = tf.keras.utils.to_categorical(listOfLabels)

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    listOfImages, categories, test_size=0.33, random_state=42)


In [38]:

def define_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(50, 35, 1)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(3, activation='softmax'))
    # compile model
    opt = SGD(lr=0.01, momentum=0.9)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model2 = define_model()

In [39]:
history = model2.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [42]:
model2.predict_classes(X_test)

array([0, 1, 1, ..., 0, 1, 2])