In [1]:
%cd ..

import numpy as np
import pandas as pd
from src.convolution import *
from src.linear import *
from src.loss import *
from src.activation import *
from src.encapsulation import *
from utils.mltools import *


def load_usps(fn):
    with open(fn, "r") as f:
        f.readline()
        data = [[float(X) for X in l.split()] for l in f if len(l.split()) > 2]
    tmp = np.array(data)
    return tmp[:, 1:], tmp[:, 0].astype(int)


def get_usps(l, X, y):
    if type(l) != list:
        resx = X[y == l, :]
        resy = y[y == l]
        return resx, resy
    tmp = list(zip(*[get_usps(i, X, y) for i in l]))
    tmpx, tmpy = np.vstack(tmp[0]), np.hstack(tmp[1])
    return tmpx, tmpy


alltrainx, alltrainy = load_usps("data/USPS_train.txt")
alltestx, alltesty = load_usps("data/USPS_test.txt")


def load_one_class(number):
    X_train, y_train = get_usps(number, alltrainx, alltrainy)
    X_test, y_test = get_usps(number, alltestx, alltesty)
    y_train = np.where(y_train == number, -1, 1).reshape(-1, 1)
    y_test = np.where(y_test == number, -1, 1).reshape(-1, 1)
    return X_train, y_train, X_test, y_test


def load_two_classes(neg, pos):
    X_train, y_train = get_usps([neg, pos], alltrainx, alltrainy)
    X_test, y_test = get_usps([neg, pos], alltestx, alltesty)
    y_train = np.where(y_train == neg, -1, 1).reshape(-1, 1)
    y_test = np.where(y_test == neg, -1, 1).reshape(-1, 1)
    return X_train, y_train, X_test, y_test


d:\~Perso\Etudes\DAC\NeuralNetworksDIY


In [None]:
import os
import pandas as pd
from skimage import io, transform
from PIL import Image


def load_data(root_dir):
    size = (540, 420)
    X = []
    for filepath in os.listdir(root_dir + "/X"):
        imgpath = os.path.join(root_dir, "X", filepath)
        image = io.imread(imgpath)
        image = transform.resize(image, size)
        X.append(image)
    X = np.array(X)

    y = []
    for filepath in os.listdir(root_dir + "/y"):
        imgpath = os.path.join(root_dir, "y", filepath)
        image = io.imread(imgpath)
        image = transform.resize(image, size)
        y.append(image)
    y = np.array(y)

    return X, y


X, y = load_data("./data/dirty_documents")


In [None]:
X[:, np.newaxis, :, :].shape


(144, 1, 540, 420)

In [None]:
X[:, :, :, np.newaxis].shape


(144, 540, 420, 1)

In [2]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler

alltrainy = OneHotEncoder().fit_transform(alltrainy.reshape(-1, 1)).toarray()
alltrainy.shape

(7291, 10)

In [None]:
net = Sequential(
    Conv1D(3, 1, 32, 1, init_type="xavier_normal"),
    ReLU(),
    MaxPool1D(2, 2),
    Conv1D(3, 32, 32),
    ReLU(),
    MaxPool1D(2, 2),
    Flatten(),
    Linear(1984, 10),
)
optimizer = Optim(net, CrossEntropyLoss(), eps=0.001)
lossList = optimizer.SGD(alltrainx[:,:,np.newaxis], alltrainy, 1000, 10)
print(lossList)
pd.Series(lossList).plot()


In [4]:
net = Sequential(
    Conv1D(3, 1, 32, 1),
    MaxPool1D(2, 2),
    Flatten(),
    Linear(4064, 100),
    ReLU(),
    Linear(100, 10),
)
optimizer = Optim(net, CrossEntropyLoss(), eps=0.001)
result_df = optimizer.SGD_eval(alltrainx[:,:,np.newaxis], alltrainy, 1000, 5, test_size=0.33, return_dataframe=True)


100%|██████████| 5/5 [01:35<00:00, 19.05s/it]


In [5]:
result_df

Unnamed: 0,epoch,loss_test,loss_train,score_train,score_test
0,0,,,0.160319,0.170752
1,1,,,0.160319,0.170752
2,2,,,0.160319,0.170752
3,3,,,0.160319,0.170752
4,4,,,0.160319,0.170752


## Vectorisation


In [6]:
input = np.arange(25).reshape(5, 5)

kernel_size = 4
layer_stride = 1

height, width = input.shape
rows_stride, columns_strides = input.strides

out_height = int((height - kernel_size) / layer_stride + 1)
out_width = int((width - kernel_size) / layer_stride + 1)

new_shape = (out_height, out_width, kernel_size, kernel_size)
new_strides = (
    rows_stride * layer_stride,
    columns_strides * layer_stride,
    rows_stride,
    columns_strides,
)

windowed_input = np.lib.stride_tricks.as_strided(input, new_shape, new_strides)
print(
    windowed_input,
    f"\nShape: {windowed_input.shape}, \tStrides: {windowed_input.strides}",
)


[[[[ 0  1  2  3]
   [ 5  6  7  8]
   [10 11 12 13]
   [15 16 17 18]]

  [[ 1  2  3  4]
   [ 6  7  8  9]
   [11 12 13 14]
   [16 17 18 19]]]


 [[[ 5  6  7  8]
   [10 11 12 13]
   [15 16 17 18]
   [20 21 22 23]]

  [[ 6  7  8  9]
   [11 12 13 14]
   [16 17 18 19]
   [21 22 23 24]]]] 
Shape: (2, 2, 4, 4), 	Strides: (20, 4, 20, 4)


In [7]:
input = np.arange(5)

kernel_size = 2
layer_stride = 3

length = input.shape[0]
rows_stride = input.strides[0]

out_height = int((length - kernel_size) / layer_stride + 1)

new_shape = (
    out_height,
    kernel_size,
)
new_strides = (
    rows_stride * layer_stride,
    rows_stride,
)

windowed_input = np.lib.stride_tricks.as_strided(input, new_shape, new_strides)
print(
    windowed_input,
    f"\nShape: {windowed_input.shape}, \tStrides: {windowed_input.strides}",
)


[[0 1]
 [3 4]] 
Shape: (2, 2), 	Strides: (12, 4)
