In [1]:
from sklearn import preprocessing
from sklearn.decomposition import PCA 
import numpy as np
import pandas as pd 
import math
import time 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

np.random.seed(22995386)

# Data loading
df = pd.read_csv('dataset\\LP1\\LP1_DP.csv')

ylabels = df.Class.unique()
output_dim = len(ylabels)
full_data = df.to_numpy()
full_data = np.transpose(np.transpose(full_data)[1:])

In [2]:
train_data = []
lp_tmp = []
np.random.shuffle(full_data)
for tp in full_data:
    train_data.append(tp[1:])
    lp_tmp.append(tp[0])

# oneHot Encoding
train_label = np.zeros( ( len(train_data), output_dim ) )
for i in range(len(lp_tmp)):
    train_label[i][lp_tmp[i]-1] = 1     # let 1-5 turns to 0-4

In [3]:
#data preprocessing
x_scale = preprocessing.scale(train_data)
x_normalized = ( preprocessing.normalize(x_scale, norm='l2')  ) 
# if x_normalized.min() < 0:
#     x_normalized = x_normalized - x_normalized.min()
#     x_normalized = x_normalized * 255 / x_normalized.max()
pca=PCA(whiten=True)
# newData=pca.fit_transform(x_normalized)
newData = x_normalized


In [4]:
def Minmax_scale(w):
    return np.array( (w-w.min()) / (w.max()-w.min()) )
minmax = 0
def Partition(data, label, ratio):
    if minmax == 1 :
        data = np.transpose(data).astype(np.float64)
        for i in range (len(data) ):
            data[i] = Minmax_scale(data[i])
        data = np.transpose(data)
    t_data = data[0:int(len(data)*ratio) ]
    t_label = label[0:int(len(label)*ratio) ]
    verify_data = data[int(len(data)*ratio):len(data)]
    verify_label = label[int(len(data)*ratio):len(data)]
    return t_data, t_label, verify_data, verify_label

def Prediction(a):
    index = 0
    for i in range( len(a) ):
        if a[i] > a[index]:
            index = i
    return index



train_ratio = 0.8
t_data, t_label, verify_data, verify_label = Partition(newData, train_label, train_ratio)

In [5]:
#cnn CONV
np.random.seed(225)
def create_filter(in_ch, out_ch, keneral):
    return np.random.randint(3, size=(out_ch, keneral[0], keneral[1] )) -1

def conv(data, l1_filter , stride, padding ):
    if padding > 0:
        for i in range(padding):
            data = np.insert(data, 0, np.zeros( len(data) ), axis=1 )
            data = np.insert(data, len(data[0]), np.zeros( len(data) ), axis=1 )
            data = np.insert(data, 0, np.zeros( len(data[0]) ), axis=0 )
            data = np.insert(data, len(data), np.zeros( len(data[0]) ), axis=0 )
    result = []
    keneral = [ len(l1_filter[0]), len(l1_filter[0][0]) ]
    for filt in l1_filter:
        i = 0
        tmp = []
        while(i <= len(data)- keneral[0]):
            j = 0
            while(j <= len(data[i])- keneral[1] ):
                tmp.append( sum(sum( data[i : i+keneral[0] ,j : j+keneral[1] ] * filt) )    )
                j += stride
            i += stride
        tmp = np.reshape(tmp, (len(data)- keneral[0]+1, len(data[i])- keneral[1]+1)  )
        result.append(tmp)
    return result

def pooling(data, fuct, size):
    result = []
    for layer in data:
        i = 0
        tmp = []
        while(i < len(layer) ):
            j = 0
            while(j < len(layer[i]) ):
                tmp.append( fuct([fuct( layer[k][j:j+size] )for k in range(i,i+size)] )    )
                j += size
            i += size
        tmp = np.reshape(tmp, ( int(len(layer)/size), int(len(layer[0])/size) )  )
        result.append(tmp)
    return result

l1_filter = create_filter( 1, 16, [3,4])
cnn_t_data = [  np.ndarray.flatten( np.asarray( pooling( conv(  np.reshape(np.asarray(t_data[i]), (9,10)), l1_filter , 1, 1), max, 3 ) ) ) for i in range(len(t_data) )]
cnn_verify_data = [  np.ndarray.flatten( np.asarray( pooling( conv(  np.reshape(np.asarray(verify_data[i]), (9,10)), l1_filter , 1, 1), max, 3 ) ) ) for i in range(len(verify_data) )]
l2_filter = create_filter( 1, 16, [3,3])
cnn_t_data = [  np.ndarray.flatten( np.asarray( pooling( conv(  np.reshape(np.asarray(cnn_t_data[i]), (12,12)), l2_filter , 1, 1), max, 3 ) ) ) for i in range(len(cnn_t_data) )]
cnn_verify_data = [  np.ndarray.flatten( np.asarray( pooling( conv(  np.reshape(np.asarray(cnn_verify_data[i]), (12,12)), l2_filter , 1, 1), max, 3 ) ) ) for i in range(len(cnn_verify_data) )]
np.shape(cnn_t_data)

(70, 256)

In [6]:

torch.manual_seed(35948766)
class MLP(nn.Module):
    def __init__(self, input, output):
        super(MLP,self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(input, 120),
            nn.BatchNorm1d(120),
            nn.ReLU6()
        )
        self.layer2 = nn.Sequential(
            nn.Linear(120, 60),
            nn.ReLU6()
        )
        self.layer3 = nn.Sequential(
            nn.Linear(60, 30),
            
        )
        self.layer4 = nn.Sequential(
            nn.Linear(30, 15),
            
        )
        self.layer5 = nn.Sequential(
            nn.Linear(15, output),
            nn.Softmax(dim=1)
        )
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        return x
model = MLP(len(cnn_t_data[0]), output_dim)
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0095)
Epoch = 6400

maxt = 0
maxv = 0
minl = 10000
for epoch in range(Epoch):
    acc_t = 0
    acc_v = 0
    mean_loss = 0
    tmp1, tmp2, tmp3 = maxt, maxv, minl
    optimizer.zero_grad()
    inputs = torch.autograd.Variable( torch.FloatTensor(cnn_t_data))
    outputs = model(inputs)
    loss = criterion( outputs, torch.tensor(t_label) )
    mean_loss += loss.item()
    loss.backward()
    optimizer.step()

# if there exist better loss or training accuracy or testing accuracy 
    minl = min(minl, mean_loss/len(t_data))
    for i in range (len(outputs)):
        p = Prediction(outputs[i])
        if t_label[i][p] == 1:
            acc_t += 1
    var_in = torch.autograd.Variable( torch.FloatTensor(cnn_verify_data))
    var_out = model(var_in)
    for i in range (len(var_out)):
        p = Prediction(var_out[i])
        if verify_label[i][p] == 1:
            acc_v += 1
    maxt = max(maxt, acc_t / len(t_data))
    maxv = max(maxv, acc_v / len(verify_data))
    if tmp1 != maxt or tmp2 != maxv or tmp3 != minl :
        print("Epoch:" + str(epoch))
        print( mean_loss/len(t_data) )
        print("training acc: " + str(acc_t / len(t_data)))
        print("testing acc: " + str(acc_v / len(verify_data)))


    if epoch % 100 == 0:
        print("Epoch:" + str(epoch))
        print( mean_loss/len(t_data) )
        print("acc: " + str(acc_t / len(t_data)))
        print("testing acc: " + str(acc_v / len(verify_data)))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (18x144 and 256x120)

In [49]:
# torch.manual_seed(1)
# class CNN(nn.Module):
#     def __init__(self):
#         super(CNN, self).__init__()
#         self.conv1 = nn.Sequential( # 1, 9, 10
#             nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3,4) , stride=1, padding=2, ) , # 16, 9, 9
#             nn.MaxPool2d(kernel_size=3 ),  # 16, 3, 3
#             nn.ReLU()
#         )
#         self.out = nn.Linear(144, output_dim)

#     def forward(self, x):
#         x = self.conv1(x)
#         return self.out(x)

# cnn = CNN()

# criterion = nn.MultiLabelSoftMarginLoss()
# optimizer = torch.optim.Adam(cnn.parameters(), lr=0.01)
# Epoch = 6400

# maxt = 0
# maxv = 0
# minl = 10000
# for epoch in range(Epoch):
#     acc_t = 0
#     acc_v = 0
#     mean_loss = 0
#     tmp1, tmp2, tmp3 = maxt, maxv, minl
#     for i in range(len(t_data)):
#         optimizer.zero_grad()
#         inputs = torch.autograd.Variable( torch.FloatTensor(  np.reshape(np.asarray(t_data[i:i+16]), (16,1,9,10)) )  )
#         outputs = cnn(inputs)
#         loss = criterion( outputs, torch.tensor(t_label[i]) )
#         mean_loss += loss.item()
#         loss.backward()
#         optimizer.step()

# # if there exist better loss or training accuracy or testing accuracy 
#     minl = min(minl, mean_loss/len(t_data))
#     for i in range (len(outputs)):
#         p = Prediction(outputs[i])
#         if t_label[i][p] == 1:
#             acc_t += 1
#     var_in = torch.autograd.Variable( torch.FloatTensor(verify_data))
#     var_out = cnn(var_in)
#     for i in range (len(var_out)):
#         p = Prediction(var_out[i])
#         if verify_label[i][p] == 1:
#             acc_v += 1
#     maxt = max(maxt, acc_t / len(t_data))
#     maxv = max(maxv, acc_v / len(verify_data))
#     if tmp1 != maxt or tmp2 != maxv or tmp3 != minl :
#         print("Epoch:" + str(epoch))
#         print( mean_loss/len(t_data) )
#         print("training acc: " + str(acc_t / len(t_data)))
#         print("testing acc: " + str(acc_v / len(verify_data)))


#     if epoch % 100 == 0:
#         print("Epoch:" + str(epoch))
#         print( mean_loss/len(t_data) )
#         print("acc: " + str(acc_t / len(t_data)))
#         print("testing acc: " + str(acc_v / len(verify_data)))


ValueError: cannot reshape array of size 1350 into shape (16,1,9,10)