In [1]:
import os
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

In [2]:
#to read points from txt files
def readTXTpoints(filepath):
    filepath = os.getcwd() + filepath
    f = open(filepath,'r')
    temp=[]
    for i in f.readlines():
        x,y=[float(t) for t in i.split()]
        temp.append([x,y])
    X=np.array(temp)
    return X # array of all points

#to make y as K dimensional classfication output (k = number of classes, here 3)
def clas(y):
    y = np.fromiter(map(int, y), dtype = np.int)
    y_temp = []
    for i in range(len(y)):
        temp = [0,0,0] #3 classes
        temp[y[i]] = 1
        y_temp.append(temp)
    return np.array(y_temp)

#to seperate X and y features
def xy(data):
    X=[]
    y=[]
    for i,j in enumerate(data):
        for c in j:
            y.append(i)
            X.append(c)
    return np.array(X),clas(np.array(y)) #X and y features

#to read points from csv files and get X and y features
def readCSVpoints(filepath):
    filepath = os.getcwd() + filepath
    df = pd.read_csv(filepath)
    mylist = df.to_numpy()
    X=[]
    y=[]
    for i in range(len(mylist)):
        X.append(mylist[i][:-1])
        y.append(mylist[i][-1])    
    return [X,y] # X and y features

def TrainValidateTestSplit(x, Y):
    """
    Splits into TrainData, ValidateData, TestData
    Output: 6 arrays i.e. X_train, X_validate, X_test, y_train, y_validate, y_test
    """
    class_points = len(Y)//3 #points per class
    index = 0
    X_train, X_validate, X_test, y_train, y_validate, y_test = [],[],[],[],[],[]
    for i in range(3):
        X = x[index:index+class_points]
        y = Y[index:index+class_points]
        index = index+class_points
        # randomly shuffled data in each class
        temp = list(zip(X, y))  
        random.shuffle(temp)
        X, y = zip(*temp)
        #split train-validate-test data as 60:20:20
        X_train += X[0:int(.6*len(X))]
        X_validate += X[int(.6*len(X)):int(.8*len(X))]
        X_test += X[int(.8*len(X)):int(len(X))]
        y_train += y[0:int(.6*len(y))]
        y_validate += y[int(.6*len(y)):int(.8*len(y))]
        y_test += y[int(.8*len(y)):int(len(y))]   
    return np.array(X_train), np.array(X_validate), np.array(X_test), np.array(y_train), np.array(y_validate), np.array(y_test)

def TrainValidateSplit(x, Y):
    """
    Splits into TrainData, ValidateData
    Output: 4 arrays i.e. X_train, X_validate, y_train, y_validate
    """
    Y = clas(Y)
    class_points = len(Y)//3 #points per class
    #print(class_points)
    index = 0
    X_train, X_validate, y_train, y_validate = [],[],[],[]

    for i in range(3):
        X = x[index:index+class_points]
        y = Y[index:index+class_points]
        index = index+class_points
        # randomly shuffled data in each class
        temp = list(zip(X, y))  
        random.shuffle(temp)
        X, y = zip(*temp)
        #split train-validate image data as 40:10 i.e. 80:20
        X_train += X[0:int(.8*len(X))]
        X_validate += X[int(.8*len(X)):int(len(X))]
        y_train += y[0:int(.8*len(y))]
        y_validate += y[int(.8*len(y)):int(len(y))]
    return np.array(X_train), np.array(X_validate), np.array(y_train), np.array(y_validate)

In [3]:
# Load All Data
# Code to generate TrainData, ValidateData, TestData in comments under their respective dataset

ls_c1 = readTXTpoints("/Group20/Classification/LS_Group20/Class1.txt")
ls_c2 = readTXTpoints("/Group20/Classification/LS_Group20/Class2.txt")
ls_c3 = readTXTpoints("/Group20/Classification/LS_Group20/Class3.txt")
ls_data = [ls_c1, ls_c2, ls_c3]
# X_train, X_validate, X_test, y_train, y_validate, y_test = TrainValidateTestSplit(xy(ls_data)[0],xy(ls_data)[1])

nls = readTXTpoints("/Group20/Classification/NLS_Group20.txt")
nls_data = [nls[i:i + 500] for i in range(0, len(nls), 500)]
# X_train, X_validate, X_test, y_train, y_validate, y_test = TrainValidateTestSplit(xy(nls_data)[0],xy(nls_data)[1])

img_data_train = readCSVpoints("/Group20/Classification/Image_Group20/trainBOVW.csv")
img_data_test = readCSVpoints("/Group20/Classification/Image_Group20/testBOVW.csv")
# [X_train, X_validate, y_train, y_validate],  X_test, y_test = TrainValidateSplit(img_data_train[0], img_data_train[1]), img_data_test[0], clas(img_data_test[1])

uni_data = readCSVpoints("/Group20/Regression/UnivariateData/20.csv")
# X_train, X_validate, X_test, y_train, y_validate, y_test = TrainValidateTestSplit(uni_data[0],uni_data[1])

bi_data = readCSVpoints("/Group20/Regression/BivariateData/20.csv")
# X_train, X_validate, X_test, y_train, y_validate, y_test = TrainValidateTestSplit(bi_data[0],bi_data[1])

In [4]:
#train each node for training tuples
def node_trainer(X, Y, epochs, l_rate, activation_function ):
    w = np.random.rand(X.shape[1]+1)
    
  
    for epoch in range(epochs):
        for x, label in zip(X, Y):
            x = np.insert(x,0,1)
            y = np.dot(w, x.T)
            if activation_function == "sigmoidal":
                target = 1.0/(1.0 +math.exp(-y))
                #binary cross entropy delta
                delta = -(label/target - (1-label)/(1-target))
                w -= l_rate*(delta*target)*(1-target)*x
            elif activation_function == "linear":
                target = float(y)
                delta = -(label-target)
                w -= l_rate*(delta*x)
    return w

#predict output of a node for one tuple
def node_predict(w, row, activation_function):
    row = np.insert(row,0,1)
    y = np.dot(w, row.T)
    if activation_function == "sigmoidal":
        target = round(1.0/(1.0 +math.exp(-float(y))))
    elif activation_function == "linear":
        target = float(y)
    return target

# Calculate accuracy percentage
def classification_accuracy_metric(actual, predicted):
	correct = 0
	for i in range(len(actual)):
		if np.array_equal(np.array(actual[i]), np.array(predicted[i])):
			correct += 1
	return correct / float(len(actual)) * 100.0

def model_train(X_train, y_train, epochs, l_rate, activation_function):
    if(activation_function == "sigmoidal"):
        
        y_train0 = [y_train[i][0] for i in range(len(y_train))]
        y_train1 = [y_train[i][1] for i in range(len(y_train))]
        y_train2 = [y_train[i][2] for i in range(len(y_train))]

        w = []
        w.append(node_trainer(X_train, y_train[0], epochs, l_rate, activation_function))
        w.append(node_trainer(X_train, y_train[1], epochs, l_rate, activation_function))
        w.append(node_trainer(X_train, y_train[2], epochs, l_rate, activation_function))
    
    elif (activation_function == "linear"):
        w = node_trainer(X_train, y_train, epochs, l_rate, activation_function)
        
    return w

def model_predict(w, X, y, activation_function):
    if(activation_function == "sigmoidal"):
        predicted = [[int(node_predict(w[0], X[i], activation_function)), int(node_predict(w[1], X[i], activation_function)), int(node_predict(w[2], X[i], activation_function))] for i in range(len(X))]
        return classification_accuracy_metric(y, predicted), predicted
    elif (activation_function == "linear"):
        predicted = [node_predict(w, X[i], activation_function) for i in range(len(X))]
        return mean_squared_error(y, predicted), predicted


def grid_search(X_train, y_train, X_validate, y_validate, l_rate, epochs_all, activation_function):
    model_accuracy = []
    model_complexity = []
    for epochs in epochs_all:
        w = model_train(X_train, y_train, epochs, l_rate, activation_function)
        #validating model
        model_accuracy.append(model_predict(w, X_validate, y_validate, activation_function)[0])
        model_complexity.append(epochs)
        #print(classification_model_predict(w, X_validate, y_validate, activation_function)[1])
    model_accuracy, model_complexity = zip(*sorted(zip(model_accuracy,model_complexity), reverse=True))
    
    return model_complexity, model_accuracy

In [5]:
X_train, X_validate, X_test, y_train, y_validate, y_test = TrainValidateTestSplit(xy(ls_data)[0],xy(ls_data)[1])
l_rate = 0.01
epochs_all = [10,50,100]
activation_function = "sigmoidal"
#print(y_validate)
print(grid_search(X_train, y_train, X_validate, y_validate, l_rate, epochs_all, activation_function))


((10, 100, 50), (30.0, 3.3333333333333335, 0.6666666666666667))


In [6]:
X_train, X_validate, X_test, y_train, y_validate, y_test = TrainValidateTestSplit(bi_data[0],bi_data[1])
l_rate = 0.01
epochs_all = [10,50,100]
activation_function = "linear"
#print(y_validate)
print(grid_search(X_train, y_train, X_validate, y_validate, l_rate, epochs_all, activation_function))

((100, 50, 10), (8.188511305018457, 8.188511305018457, 8.188511305018457))
