In [1]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from torchvision import transforms

from tqdm import tqdm
import pickle

import numpy as np
import math
import skimage
from skimage import io
import torch
import os

In [3]:
class CaptchaDataloader(Dataset):
    def __init__(self, root_dir, split='treinamento', transform=None,label_dir = 'labels10k'):
        self.root_dir = root_dir
        self.split = split
        self.transform = transform
        self.label_dir = label_dir
        
        self.img_dir = os.path.join(root_dir,split)
        self.lbl_dir = os.path.join(root_dir,label_dir)

        if(split == 'teste'):
            self.imgs_files = sorted(os.listdir(self.img_dir), key=lambda x: int(x.split('.')[0]))
        else:
             self.imgs_files = os.listdir(self.img_dir)
             
        self.lbls_files = [f.replace('.jpg', '.txt') for f in self.imgs_files]
        
    def __len__(self):
        return(len(self.imgs_files))

    def extract_hog_features(self,img):
        # image = skimage.io.imread('teste/treinamento/000001.jpg',as_gray=True)
        img = skimage.transform.resize(img, (128,64))
        # print(len(img[1]))
        mag = []
        theta = []

        for i in range(128):
            magnitudeArray = []
            angleArray = []

            for j in range(64):
                # Condition for axis 0
                if j-1 <= 0 or j+1 >= 64:
                    if j-1 <= 0:
                        # Condition if the first element
                        Gx = img[i, j+1] - 0
                    elif j + 1 >= 64:
                        Gx = 0 - img[i, j-1]
                # Condition for the first element
                else:
                    Gx = img[i, j+1] - img[i, j-1]

                # Condition for axis 1
                if i-1 <= 0 or i+1 >= 128:
                    if i-1 <= 0:
                        Gy = 0 - img[i+1, j]
                    elif i +1 >= 128:
                        Gy = img[i-1, j] - 0
                else:
                    # print(i, j)
                    Gy = img[i-1, j] - img[i+1, j]

                # Calculating magnitude
                # print(Gx)
                magnitude = math.sqrt(pow(Gx, 2) + pow(Gy, 2))
                magnitudeArray.append(round(magnitude, 9))

                # Calculating angle
                if Gx == 0:
                    angle = np.degrees(0.0)
                else:
                    angle = np.degrees(np.abs(np.arctan(Gy / Gx)))
                angleArray.append(round(angle, 9))

            mag.append(magnitudeArray)
            theta.append(angleArray)

        mag = np.array(mag)
        theta = np.array(theta)

        row, col = mag.shape
        features = []

        for i in range(0, row, 8):
            for j in range(0, col, 8):
                curr_mag = mag[i:i+8, j:j+8]
                curr_theta = theta[i:i+8, j:j+8]
                window_row, window_col = curr_mag.shape
                histogram = np.zeros(9)

                for k in range (window_row):
                    for l in range (window_col):
                        first_bin = int(curr_theta[k, l] / 20)
                        second_bin = (math.ceil(curr_theta[k, l] / 20)) % 9

                        if first_bin == second_bin:
                            histogram[first_bin] += curr_mag[k, l]
                        else:
                            histogram[first_bin] += curr_mag[k, l] * (1 - (curr_theta[k, l] - first_bin * 20) / 20)
                            histogram[second_bin] += curr_mag[k, l] * ((curr_theta[k, l] - first_bin * 20) / 20)

                features += histogram.tolist()

        return features


    def __getitem__(self, idx) :
        
        img_name = os.path.join(self.img_dir, self.imgs_files[idx])
        lbl_name = os.path.join(self.lbl_dir, self.lbls_files[idx])

        image = io.imread(img_name,as_gray=True)
        
        with open(lbl_name,'r') as file:
            label_str = file.read()
                    
        hog_image = self.extract_hog_features(image)
        image = torch.from_numpy(image)
        image = image.to(torch.float32)
        image = image[None,:,:]


        if self.transform:
            image = self.transform(image)



        label_str = str(label_str)
        label_str = label_str.replace('\n', '')
        
        label = [ord(char) - 27 if ord(char) == 63 else ord(char) - 48 if 48 <= ord(char) <= 57 else ord(char) - 55 for char in label_str]
        label = int(label[0])

        #CrossEntropy nao usa hot
        label_hot = np.zeros(37)
        label_hot[label] = 1
        label_hot = torch.tensor(label_hot)
        return hog_image,label

In [4]:
train_data = CaptchaDataloader(split='treinamento',transform= None,root_dir='/scratch/diogochaves/Projetos/ICV/Dataset/Cortado')
val_data = CaptchaDataloader(split='validacao',transform= None,root_dir='/scratch/diogochaves/Projetos/ICV/Dataset/Cortado')
test_data = CaptchaDataloader(split='teste',transform= None,root_dir='/scratch/diogochaves/Projetos/ICV/Dataset/Cortado')
    

In [5]:
x_train = []
y_train = []

for features,label in tqdm(train_data, desc=f'Criando vetor',dynamic_ncols=True,colour="BLUE"):
    x_train.append(features)
    y_train.append(label)
    
with open('x_train.pkl', 'wb') as f:
    pickle.dump(x_train, f)
    
with open('y_train.pkl', 'wb') as f:
    pickle.dump(y_train, f)

Criando vetor:  34%|[34m███▍      [0m| 16428/48000 [00:04<00:08, 3850.93it/s]


KeyboardInterrupt: 

In [None]:
x_test = []
y_test = []

for features,label in tqdm(test_data, desc=f'Criando vetor',dynamic_ncols=True,colour="BLUE"):
    x_test.append(features)
    y_test.append(label)

with open('x_test.pkl', 'wb') as f:
    pickle.dump(x_test, f)
    
with open('y_test.pkl', 'wb') as f:
    pickle.dump(y_test, f)


Criando vetor: 100%|[34m██████████[0m| 6000/6000 [05:09<00:00, 19.40it/s]


In [None]:
x_valid = []
y_valid  = []

for features,label in tqdm(val_data, desc=f'Criando vetor',dynamic_ncols=True,colour="BLUE"):
    x_valid.append(features)
    y_valid.append(label)

with open('x_valid.pkl', 'wb') as f:
    pickle.dump(x_valid, f)
    
with open('y_valid.pkl', 'wb') as f:
    pickle.dump(y_valid, f)


Criando vetor: 100%|[34m██████████[0m| 6000/6000 [05:11<00:00, 19.29it/s]


In [None]:
with open('x_test.pkl', 'rb') as f:
    x_test = pickle.load(f)
    
with open('y_test.pkl', 'rb') as f:
    y_test = pickle.load(f)

with open('x_train.pkl', 'rb') as f:
    x_train = pickle.load(f)

with open('y_train.pkl', 'rb') as f:
    y_train = pickle.load(f)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

import os
import torch
import sklearn.metrics as metrics
import pandas as pd

class Test:
    def __init__(self,model,test_loader,model_name,path_metric,path_n):
        self.model = model
        self.test_loader = test_loader
        self.model_name = model_name
        self.path_metric = path_metric
        self.path_n = path_n

    
    def metric(self,y_true, y_pred):
        accuracy = metrics.accuracy_score(y_true, y_pred)
        precision = metrics.precision_score(y_true, y_pred , average= 'macro')
        recall = metrics.recall_score(y_true, y_pred, average= 'macro')
        f1 = metrics.f1_score(y_true, y_pred,average= 'macro')
        report_table = {
            'Accuracy': [accuracy],
            'Precision': [precision],
            'Recall': [recall],
            'F1 Score': [f1]
        }

        report_table_df = pd.DataFrame(report_table)

        fig, ax = plt.subplots(figsize=(20, 1))
        ax.axis('tight')
        ax.axis('off')

        table = ax.table(cellText=report_table_df.values, colLabels=report_table_df.columns, cellLoc='center', loc='center')
        table.scale(1,2)

        for key, cell in table.get_celld().items():
            if key[0] == 0:
                cell.set_text_props(weight='bold')
        name = f"Metrics_" + self.model_name
        save_path = os.path.join(self.path_metric, name)
        plt.savefig(save_path)
        

    def Get_Y(self,device = "cpu"):
        y_true = [labels for _ , labels in self.test_loader]
        y_pred = []
        with torch.no_grad():
                for img, labels in self.test_loader:

                    img, labels = img.to(device), labels.to(device)

                    pred = self.model(img)
                    _, pred = torch.max(self.model(img), 1)
                    y_pred.append(pred)
                    

                    
                    

        y_true = torch.cat(y_true).to('cpu')
        y_pred = torch.cat(y_pred).to('cpu')
        print(type(y_true))
        print(type(y_pred))
        return y_true,y_pred

    
    def Get_Accuracy_per_size(self,y_true,y_pred):
        prev =(y_pred == y_true)
        PM_1 = 0
        PM_2 = 0
        PM_3 = 0
        PM_4 = 0
        PM_5 = 0
        PM_6 = 0
        for i in range(0,len(prev),6):
            acertos = 0
            for j in range(6):
                if prev[i+j]:
                    acertos = acertos + 1
                    
            if(acertos == 6):
                PM_1 = PM_1 + 1
                PM_2 = PM_2 + 1
                PM_3 = PM_3 + 1
                PM_4 = PM_4 + 1
                PM_5 = PM_5 + 1
                PM_6 = PM_6 + 1
                
            if(acertos == 5):
                PM_1 = PM_1 + 1
                PM_2 = PM_2 + 1
                PM_3 = PM_3 + 1
                PM_4 = PM_4 + 1
                PM_5 = PM_5 + 1
            
            if(acertos == 4):
                PM_1 = PM_1 + 1
                PM_2 = PM_2 + 1
                PM_3 = PM_3 + 1
                PM_4 = PM_4 + 1
                
            if(acertos == 3):
                PM_1 = PM_1 + 1
                PM_2 = PM_2 + 1
                PM_3 = PM_3 + 1
                
            if(acertos == 2):
                PM_1 = PM_1 + 1
                PM_2 = PM_2 + 1
                
            if(acertos == 1):
                PM_1 = PM_1 + 1
                
        size = (len(prev)/6)
        y = [(PM_1/size),(PM_2/size),(PM_3/size),(PM_4/size),(PM_5/size),(PM_6/size)]
        x = [1,2,3,4,5,6]
        
        plt.figure(figsize=(14, 5)) 
        plt.plot(x, y, color="Blue", linewidth=2, marker='o', markersize=8)
        
        plt.grid(True, alpha=0.6)
        plt.gca().spines['top'].set_linewidth(0)
        plt.gca().spines['bottom'].set_linewidth(0.4)
        plt.gca().spines['left'].set_linewidth(0.4)
        plt.gca().spines['right'].set_linewidth(0)
        
        plt.xlabel("\nNúmero mínimo de caracteres reconhecidos por captcha", fontsize=9)
        plt.ylabel("Taxa de Reconhecimento\n", fontsize=9)
        plt.xticks(fontsize=9)
        plt.yticks(fontsize=9)

        plt.title("Resultado "+ self.model_name + "\n\n", fontsize=10)
        plt.gca().set_axisbelow(True)

        name = f"Acertos_por_quantidade_" + self.model_name
        save_path = os.path.join(self.path_n, name + ".png")
        plt.savefig(save_path, bbox_inches='tight')
        plt.show()
    
    
    def fit(self,device):
        self.model.eval()
        y_true,y_pred = self.Get_Y(device)
        self.Get_Accuracy_per_size(y_true,y_pred)
        self.metric(y_true=y_true, y_pred=y_pred)
        classification_report = metrics.classification_report(y_true, y_pred,target_names=None)
        return classification_report
    

## MODELOS

In [None]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)

clf = SVC(kernel='linear', C=1.0)

clf.fit(x_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score


x_test = scaler.transform(x_test)

y_pred = clf.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)

print(accuracy)


0.749


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
import sklearn.metrics as metrics

models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1500, solver='newton-cg', penalty='l2'),
    'KNN': KNeighborsClassifier(n_neighbors=10, weights='distance', algorithm='kd_tree', metric='minkowski', p=1),
    'Random Forest': RandomForestClassifier(criterion='gini', n_estimators=50, random_state=42),
    'Decision Tree': DecisionTreeClassifier(criterion='gini', random_state=42),
    'SVM': SVC(random_state=42),
    'SGDC': SGDClassifier(loss='perceptron', random_state=42)
}

for model_name, model in models.items():
    print(model_name)
    model.fit(x_train, y_train)
    y_pred_train = model.predict(x_train)
    y_pred_test = model.predict(x_test)
    print("Accuracy on train set: ", metrics.accuracy_score(y_train, y_pred_train))
    print("Accuracy on test set: ", metrics.accuracy_score(x_test, y_pred_test))
    print("\n")

Logistic Regression


KeyboardInterrupt: 