In [1]:
%cd ..

import numpy as np
import pandas as pd
from src.convolution import *
from src.linear import *
from src.loss import *
from src.activation import *
from src.encapsulation import *
from utils.mltools import *


def load_usps(fn):
    with open(fn, "r") as f:
        f.readline()
        data = [[float(X) for X in l.split()] for l in f if len(l.split()) > 2]
    tmp = np.array(data)
    return tmp[:, 1:], tmp[:, 0].astype(int)


def get_usps(l, X, y):
    if type(l) != list:
        resx = X[y == l, :]
        resy = y[y == l]
        return resx, resy
    tmp = list(zip(*[get_usps(i, X, y) for i in l]))
    tmpx, tmpy = np.vstack(tmp[0]), np.hstack(tmp[1])
    return tmpx, tmpy


alltrainx, alltrainy = load_usps("data/USPS_train.txt")
alltestx, alltesty = load_usps("data/USPS_test.txt")


def load_one_class(number):
    X_train, y_train = get_usps(number, alltrainx, alltrainy)
    X_test, y_test = get_usps(number, alltestx, alltesty)
    y_train = np.where(y_train == number, -1, 1).reshape(-1, 1)
    y_test = np.where(y_test == number, -1, 1).reshape(-1, 1)
    return X_train, y_train, X_test, y_test


def load_two_classes(neg, pos):
    X_train, y_train = get_usps([neg, pos], alltrainx, alltrainy)
    X_test, y_test = get_usps([neg, pos], alltestx, alltesty)
    y_train = np.where(y_train == neg, -1, 1).reshape(-1, 1)
    y_test = np.where(y_test == neg, -1, 1).reshape(-1, 1)
    return X_train, y_train, X_test, y_test


d:\~Perso\Etudes\DAC\NeuralNetworksDIY


In [2]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler

alltrainy = OneHotEncoder().fit_transform(alltrainy.reshape(-1, 1)).toarray()
alltrainy.shape

(7291, 10)

In [7]:
net = Sequential(
    Conv1D(3, 1, 32, 1, init_type="xavier_normal"),
    MaxPool1D(2, 2),
    Flatten(),
    Linear(4064, 100, init_type="xavier_normal"),
    ReLU(),
    Linear(100, 10, init_type="xavier_normal"),
)
optimizer = Optim(net, CrossEntropyLoss(), eps=0.001)
result_df = optimizer.SGD_eval(alltrainx[:,:,np.newaxis], alltrainy, 1000, 25, test_size=0.33, return_dataframe=True)


100%|██████████| 25/25 [09:04<00:00, 21.77s/it]


In [8]:
result_df

Unnamed: 0,epoch,loss_test,loss_train,score_train,score_test
0,0,0.297328,0.187851,0.941032,0.919817
1,1,0.174563,0.156314,0.95905,0.939759
2,2,0.144112,0.142294,0.968468,0.946406
3,3,0.127571,0.137057,0.976249,0.950976
4,4,0.11614,0.130009,0.979934,0.955962
5,5,0.107822,0.124893,0.980753,0.960532
6,6,0.10116,0.122228,0.983415,0.962194
7,7,0.095764,0.118208,0.985463,0.962609
8,8,0.091532,0.115878,0.986896,0.963025
9,9,0.087558,0.11756,0.98751,0.965517


In [6]:
net = Sequential(
    Conv1D(3, 1, 32, 1, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(2, 2),
    Conv1D(3, 32, 32, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(2, 2),
    Flatten(),
    Linear(1984, 10, init_type="xavier_normal"),
)
optimizer = Optim(net, CrossEntropyLoss(), eps=0.001)
result_df = optimizer.SGD_eval(alltrainx[:,:,np.newaxis], alltrainy, 1000, 10, test_size=0.33, return_dataframe=True)
result_df


100%|██████████| 10/10 [03:51<00:00, 23.13s/it]


Unnamed: 0,epoch,loss_test,loss_train,score_train,score_test
0,0,0.335803,0.266174,0.903767,0.886996
1,1,0.261286,0.262794,0.929156,0.905692
2,2,0.24462,0.258797,0.937756,0.917324
3,3,0.230382,0.248892,0.943079,0.921894
4,4,0.21989,0.241982,0.949222,0.923972
5,5,0.214915,0.237482,0.951269,0.925218
6,6,0.211465,0.232766,0.956388,0.927711
7,7,0.208981,0.232017,0.958231,0.929788
8,8,0.207168,0.228514,0.959664,0.931034
9,9,0.203146,0.224748,0.961097,0.932281


In [26]:
net = Sequential(
    Conv1D(3, 1, 64, 1, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(8, 2),
    Conv1D(3, 64, 64, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(8, 2),
    Conv1D(3, 64, 64, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(8, 2),
    Conv1D(3, 64, 64, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(8, 2),
    Flatten(),
    Linear(512, 10)
)
optimizer = Optim(net, CrossEntropyLoss(), eps=0.001)
result_df = optimizer.SGD_eval(alltrainx[:,:,np.newaxis], alltrainy, 256, 10, test_size=0.2, return_dataframe=True)
result_df

100%|██████████| 10/10 [14:28<00:00, 86.89s/it]


Unnamed: 0,epoch,loss_test,loss_train,score_train,score_test
0,0,0.692634,0.646282,0.217593,0.227553
1,1,0.6959,0.724752,0.1869,0.194654
2,2,0.7596,0.697904,0.228052,0.234407
3,3,0.869712,0.892116,0.166667,0.169979
4,4,0.88855,0.890678,0.167867,0.170665
5,5,0.888837,0.890506,0.168381,0.170665
6,6,0.878123,0.890381,0.167867,0.170665
7,7,0.888729,0.88967,0.167181,0.169979
8,8,0.88097,0.889733,0.16821,0.17135
9,9,0.882063,0.889832,0.168381,0.170665


In [27]:
net = Sequential(
    Conv1D(3, 1, 64, 1, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(8, 2),
    Conv1D(3, 64, 64, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(8, 2),
    Conv1D(3, 64, 64, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(8, 2),
    Conv1D(3, 64, 64, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(8, 2),
    Flatten(),
    Linear(512, 256, init_type="xavier_normal"),
    Sigmoid(),
    Linear(256, 10, init_type="xavier_normal")
)
optimizer = Optim(net, CrossEntropyLoss(), eps=0.001)
result_df = optimizer.SGD_eval(alltrainx[:,:,np.newaxis], alltrainy, 256, 10, test_size=0.2, return_dataframe=True)
result_df

100%|██████████| 10/10 [16:01<00:00, 96.19s/it]


Unnamed: 0,epoch,loss_test,loss_train,score_train,score_test
0,0,0.769695,0.709095,0.294582,0.309801
1,1,0.752502,0.803353,0.280521,0.289925
2,2,0.787683,0.830319,0.297154,0.297464
3,3,0.817779,0.841485,0.251029,0.250171
4,4,0.841477,0.852569,0.188443,0.191227
5,5,0.856036,0.877574,0.219136,0.22207
6,6,0.861767,0.865203,0.218621,0.228924
7,7,0.870662,0.867484,0.183642,0.160384
8,8,0.870994,0.871871,0.244684,0.25634
9,9,0.872767,0.877386,0.195816,0.183002


In [2]:
import os
import pandas as pd
from skimage import io, transform
from PIL import Image


def load_data(root_dir):
    size = (540, 420)
    X = []
    for filepath in os.listdir(root_dir + "/X"):
        imgpath = os.path.join(root_dir, "X", filepath)
        image = io.imread(imgpath)
        image = transform.resize(image, size)
        X.append(image)
    X = np.array(X)

    y = []
    for filepath in os.listdir(root_dir + "/y"):
        imgpath = os.path.join(root_dir, "y", filepath)
        image = io.imread(imgpath)
        image = transform.resize(image, size)
        y.append(image)
    y = np.array(y)

    return X, y


X, y = load_data("./data/dirty_documents")


In [3]:
X[:, np.newaxis, :, :].shape


(144, 1, 540, 420)

In [4]:
X[:, :, :, np.newaxis].shape


(144, 540, 420, 1)