# Setup

In [125]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, hinge_loss

import random

import torch
import pandas as pd
from matplotlib import pyplot as plt

from torchvision import transforms
import torchvision
from torch.utils.data import DataLoader

from conex.helpers.filters import DoGFilter
from conex.helpers.transforms.misc import Conv2dFilter

from tqdm import tqdm

from PIL import Image


# Visualize

In [18]:
def load_image(path,size = None):
    img = cv2.imread(path)
    if(size):
        img = cv2.resize(img,size)
    return torch.tensor(img[:,:,0],dtype=torch.float32)

def show_image(image,normal=False):
    plt.axis("off")
    if(normal):
        plt.imshow(image,cmap='gray',vmin=0,vmax=255)
    else:
        plt.imshow(image,cmap='gray')
    plt.show()
    

def show_filters(weight):
    fig,axes = plt.subplots(1,weight.shape[0])
    fig.set_size_inches(5*weight.shape[0], 5)
    # fig.suptitle(f'plots of synaptic share weights for d = {weight.shape[0]}')
    for i in range(weight.shape[0]):
        axes[i].imshow(weight[i][0],cmap='gray')
        axes[i].axis('off')
        
        
def show_images(imgs,title,count):
    fig,axes = plt.subplots(1,count)
    fig.set_size_inches(5*count, 5)
    plt.text(x=0.5, y=0.94, s=title, fontsize=28, ha="center", transform=fig.transFigure)
    for i in range(count):
        axes[i].imshow(imgs[i][0][0],cmap='gray')
        axes[i].axis('off')


def confidence_crop_interspace(inp_width, inp_height, window_width, window_height):
    x1 = window_width//2
    x2 = (inp_width - 1) - (window_width//2)
    y1 = window_height//2 
    y2 = (inp_height - 1) - (window_height//2)

    center_x = random.randint(x1, x2)
    center_y = random.randint(y1, y2)
    center_coordinates = [center_x, center_y]
    top_left_x = center_x - (window_width//2)
    top_left_y = center_y - (window_height//2)
    top_left_coordinates = [top_left_x, top_left_y]
    coordinates = [center_coordinates, top_left_coordinates]

    return coordinates


# CONFIG

In [81]:
Input_Width = 28
Input_Height = 28
Crop_Window_Width = 21
Crop_Window_Height = 21
DoG_SIZE = 5

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28

OUT_CHANNEL = 8
IN_CHANNEL = 1
KERNEL_WIDTH = 13
KERNEL_HEIGHT = 13

# INPUT_WIDTH = IMAGE_WIDTH - DoG_SIZE + 1
# INPUT_HEIGHT = IMAGE_HEIGHT - DoG_SIZE + 1
INPUT_WIDTH = Crop_Window_Width - DoG_SIZE + 1
INPUT_HEIGHT = Crop_Window_Height - DoG_SIZE + 1

L4_WIDTH = INPUT_WIDTH - KERNEL_WIDTH + 1
L4_HEIGHT = INPUT_HEIGHT - KERNEL_HEIGHT + 1

L23_WIDTH = L4_WIDTH//2
L23_HEIGHT = L4_HEIGHT//2

J_0 = 300
p = 0.8

# DataLoader (MNIST)

In [82]:
from torchvision.datasets import MNIST
MNIST_ROOT = "./MNIST"

In [166]:
def DataLoadMNIST(C1 = 4, C2 = 9, sz = 100, crop_iteration = 3) :
    time_window = 100

    dataset_directory_path = "./first_step"

    transformation = transforms.Compose([
        transforms.ToTensor(),
        transforms.Grayscale(num_output_channels = 1), # not necessary
        Conv2dFilter(DoGFilter(size = 5, sigma_1 = 4, sigma_2 = 1, zero_mean=True, one_sum=True).unsqueeze(0).unsqueeze(0)),
        # SqueezeTransform(dim = 0),
        # SimplePoisson(time_window = time_window , ratio = 2),
    ])


    dataset = MNIST(root=MNIST_ROOT, train=True, download=False, transform=transformation)
    first_class = dataset.data[dataset.targets == C1][:sz]
    second_class = dataset.data[dataset.targets == C2][:sz]

    target = [0] * len(first_class) * crop_iteration + [1] * len(second_class) * crop_iteration
    target = torch.Tensor(target)

    two_class_dataset = torch.cat((first_class, second_class), dim=0)
    new_dataset_size = first_class.shape[0] + second_class.shape[0]

    new_dataset = torch.empty(0,Crop_Window_Width - DoG_SIZE + 1, Crop_Window_Height - DoG_SIZE + 1)
    centers = []


    for i in tqdm(range(0, new_dataset_size)):
        for j in range (0, crop_iteration):
            img = two_class_dataset[i]  # 4 in range [0, 5842) ; 9 in range [5842, 11791)
            img = Image.fromarray(img.numpy(), mode="L")
            a = confidence_crop_interspace(Input_Width, Input_Height, Crop_Window_Width, Crop_Window_Height)
            centers.append((a[0][0], a[0][1]))
            cropped_image = torchvision.transforms.functional.crop(img, a[1][1], a[1][0], Crop_Window_Width, Crop_Window_Height)
            # cropped_image = Image.fromarray(cropped_image.numpy(), mode="L")
            # cropped_image = img
            cropped_image = transformation(cropped_image)
            cropped_image = cropped_image.view(1, Crop_Window_Width - DoG_SIZE + 1, Crop_Window_Height - DoG_SIZE + 1)
            new_dataset = torch.cat((new_dataset.data, cropped_image.data), dim=0)

    print(new_dataset.shape)
    dl = DataLoader(new_dataset,shuffle=True)

    new_dataset = new_dataset.view(new_dataset_size * crop_iteration, -1)
    np_data = new_dataset.numpy()
    np_target = target.numpy()

    return np_data, np_target

In [167]:
np_data, np_target = DataLoadMNIST(4, 9, 100)

100%|██████████| 200/200 [00:00<00:00, 334.37it/s]

torch.Size([600, 17, 17])





# ML-Based Dataset

In [169]:
np_target.shape

(600,)

In [159]:
new_dataset = new_dataset.view(new_dataset_size * crop_iteration, -1)
new_dataset.shape

torch.Size([360, 289])

In [160]:
np_data = new_dataset.numpy()
np_target = target.numpy()

In [161]:
X_train, X_test, y_train, y_test = train_test_split(np_data, np_target, test_size=0.33, random_state=42)

# Modeling

In [162]:
from sklearn.linear_model import  SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

In [163]:
classifiers = [
    SGDClassifier(max_iter = 1000, tol = 1e-3),
    RandomForestClassifier(max_depth = 3),
    KNeighborsClassifier(n_neighbors = 3),
    SVC(gamma = 'auto'),
    LogisticRegression(),
    DecisionTreeClassifier(max_depth = 3),
    AdaBoostClassifier(algorithm="SAMME"),
    MLPClassifier(alpha=1, max_iter=1000),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    GaussianNB(),
]

In [164]:
res = []

for clf in classifiers : 
    clf.fit(X_train, y_train)

    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)

    train_acc = accuracy_score(y_train, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)

    res.append({
        "classifier" : type(clf).__name__,
        "train_acc" : train_acc,
        "test_acc" : test_acc
    })

# Final Results

In [165]:
df = pd.DataFrame(res)
df.sort_values(by = ['test_acc'], ascending = False)

Unnamed: 0,classifier,train_acc,test_acc
2,KNeighborsClassifier,0.896266,0.739496
1,RandomForestClassifier,0.937759,0.731092
8,GaussianProcessClassifier,1.0,0.705882
6,AdaBoostClassifier,0.995851,0.697479
7,MLPClassifier,0.991701,0.689076
5,DecisionTreeClassifier,0.842324,0.663866
0,SGDClassifier,0.908714,0.655462
9,GaussianNB,0.780083,0.655462
4,LogisticRegression,0.854772,0.613445
3,SVC,0.510373,0.478992


# Results on 60 * 2 * 3 images

In [171]:
np_data, np_target = DataLoadMNIST(4, 9, 60)
X_train, X_test, y_train, y_test = train_test_split(np_data, np_target, test_size=0.33, random_state=42)

res = []

for clf in classifiers : 
    clf.fit(X_train, y_train)

    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)

    train_acc = accuracy_score(y_train, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)

    res.append({
        "classifier" : type(clf).__name__,
        "train_acc" : train_acc,
        "test_acc" : test_acc
    })

df = pd.DataFrame(res)
df.sort_values(by = ['test_acc'], ascending = False)

100%|██████████| 120/120 [00:00<00:00, 319.45it/s]


torch.Size([360, 17, 17])


Unnamed: 0,classifier,train_acc,test_acc
2,KNeighborsClassifier,0.900415,0.773109
6,AdaBoostClassifier,0.995851,0.739496
8,GaussianProcessClassifier,1.0,0.739496
7,MLPClassifier,0.983402,0.705882
1,RandomForestClassifier,0.937759,0.689076
4,LogisticRegression,0.863071,0.647059
5,DecisionTreeClassifier,0.80083,0.621849
0,SGDClassifier,0.863071,0.605042
9,GaussianNB,0.738589,0.579832
3,SVC,0.510373,0.478992


# Results on 100 * 2 * 3 images

In [172]:
np_data, np_target = DataLoadMNIST(4, 9, 100)
X_train, X_test, y_train, y_test = train_test_split(np_data, np_target, test_size=0.33, random_state=42)

res = []

for clf in classifiers : 
    clf.fit(X_train, y_train)

    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)

    train_acc = accuracy_score(y_train, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)

    res.append({
        "classifier" : type(clf).__name__,
        "train_acc" : train_acc,
        "test_acc" : test_acc
    })

df = pd.DataFrame(res)
df.sort_values(by = ['test_acc'], ascending = False)

100%|██████████| 200/200 [00:00<00:00, 331.30it/s]


torch.Size([600, 17, 17])


Unnamed: 0,classifier,train_acc,test_acc
8,GaussianProcessClassifier,1.0,0.792929
1,RandomForestClassifier,0.905473,0.772727
2,KNeighborsClassifier,0.915423,0.767677
6,AdaBoostClassifier,0.880597,0.717172
0,SGDClassifier,0.781095,0.666667
4,LogisticRegression,0.743781,0.656566
5,DecisionTreeClassifier,0.766169,0.641414
9,GaussianNB,0.691542,0.626263
3,SVC,0.512438,0.474747
7,MLPClassifier,0.514925,0.464646


# Results on 500 * 2 * 3 images

In [173]:
np_data, np_target = DataLoadMNIST(4, 9, 500)
X_train, X_test, y_train, y_test = train_test_split(np_data, np_target, test_size=0.33, random_state=42)

res = []

for clf in classifiers : 
    clf.fit(X_train, y_train)

    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)

    train_acc = accuracy_score(y_train, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)

    res.append({
        "classifier" : type(clf).__name__,
        "train_acc" : train_acc,
        "test_acc" : test_acc
    })

df = pd.DataFrame(res)
df.sort_values(by = ['test_acc'], ascending = False)

100%|██████████| 1000/1000 [00:04<00:00, 213.69it/s]


torch.Size([3000, 17, 17])


Unnamed: 0,classifier,train_acc,test_acc
8,GaussianProcessClassifier,1.0,0.889899
7,MLPClassifier,0.882587,0.833333
2,KNeighborsClassifier,0.926368,0.807071
1,RandomForestClassifier,0.841791,0.80303
6,AdaBoostClassifier,0.8,0.738384
5,DecisionTreeClassifier,0.750249,0.734343
0,SGDClassifier,0.765174,0.70101
4,LogisticRegression,0.757711,0.69697
9,GaussianNB,0.666667,0.636364
3,SVC,0.518905,0.493939
