In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from numpy.linalg import inv
from numpy.linalg import norm
from numpy.linalg import pinv
import math

def read_data(datafile):
    with open(datafile, 'r', encoding='utf-8-sig') as data_file:
        data = []
        for line in data_file:
            lines = line.split()
            lines = [float(i) for i in lines]
            data.append(lines)
        data = np.array(data)
    return data

def split_data(data):
    train = data[0:np.min(np.where(data[:,0]>0)),:]
    test = data[np.min(np.where(data[:,0]>0)):len(data),:]
    num_classes_train = int(np.max(train[:,len(train.T)-1])+1)
    num_classes_test = int(np.max(test[:,len(train.T)-1])+1)
    t_train = list(train[:,len(train.T)-1].astype(int))
    t_test = list(test[:,len(train.T)-1].astype(int))
    train = train[:,3:len(train.T)-1]
    test = test[:,3:len(test.T)-1]
    return train.T, test.T, num_classes_train, num_classes_test, t_train, t_test

def split_training_data(train, t_train, i, subset_train):
    validation_set = train[:,i*subset_train:(i+1)*subset_train]
    t_train_valid = t_train[i*subset_train:(i+1)*subset_train]
    train = np.delete(train, np.arange(i*subset_train,(i+1)*subset_train), 1)
    t_train = np.delete(t_train, np.arange(i*subset_train,(i+1)*subset_train))
    return validation_set, t_train_valid, train, t_train
    
def indices_to_one_hot(data, nb_classes):
    return (np.eye(nb_classes)[np.array(data)]).T

def compute_H_train(train):
    samples = train.shape[1]
    feature_length = train.shape[0] 
    nodes = 1000
    W = np.random.normal(size=[feature_length, nodes])
    B = np.random.normal(size=[nodes])
    H_train = np.zeros((samples, nodes))
    for i in range(nodes):
        for j in range(samples):
            H_train[j,i] = 1 / (1 + math.exp(-(np.matmul(W[:,i], train[:,j]) + B[i])))
    return H_train, samples, nodes, W, B

def compute_H_test(test, nodes, W, B):
    samples = test.shape[1]
    feature_length = test.shape[0]
    H_test = np.zeros((samples, nodes))
    for i in range(nodes):
        for j in range(samples):
            H_test[j,i] = 1 / (1 + math.exp(-(np.matmul(W[:,i], test[:,j]) + B[i])))
    return H_test, samples

def o_star_train(T, H, samples, nodes, num_classes):
    min_lambda = 1
    max_lambda = 10
    length = int(max_lambda/min_lambda)
    lambda_vec = np.linspace(min_lambda,max_lambda,length)
    error = np.zeros((length))
    o_star = np.zeros((nodes, num_classes, length))
    W1 = np.random.normal(size=[nodes])
    B1 = np.random.normal(size=[nodes])
    H1 = np.zeros((samples,nodes))
    W2 = np.random.normal(size=[nodes])
    B2 = np.random.normal(size=[nodes])
    H3 = np.zeros((samples,nodes))
    if nodes >= samples:
        for lam in range(length):
            lam_mat = lambda_vec[lam]*np.eye(samples, samples)
            for i in range(nodes):
                H1[:,i] = 1 / (1 + np.exp(-(W1[i]*H[:,i] + B1[i])))
            W_he = np.matmul(np.log(H1/(1-H1)), pinv(H))
            H2 = 1 / (1 + np.exp(-np.matmul(W_he, H)))
            for i in range(nodes):
                H3[:,i] = 1 / (1 + np.exp(-(W2[i]*H2[:,i] + B2[i])))
            W_he1 = np.matmul(np.log(H3/(1-H3)), pinv(H2))
            H4 = 1 / (1 + np.exp(-np.matmul(W_he1, H2)))
            o_star[:,:,lam] = np.matmul(np.matmul(H4.T, inv(np.matmul(H4, H4.T) + lam_mat)), T.T)
            T_hat = (np.matmul(H4, o_star[:,:,lam])).T
            error[lam] = compute_accuracy(T_hat, T, samples)
        print("Training error =", error[np.argmin(error)], "Lambda =", lambda_vec[np.argmin(error)])
    if nodes < samples:
        for lam in range(length):
            lam_mat = lambda_vec[lam]*np.eye(nodes, nodes)
            for i in range(nodes):
                H1[:,i] = 1 / (1 + np.exp(-(W1[i]*H[:,i] + B1[i])))
            W_he = np.matmul(np.log(H1/(1-H1)), pinv(H))
            H2 = 1 / (1 + np.exp(-np.matmul(W_he, H)))
            for i in range(nodes):
                H3[:,i] = 1 / (1 + np.exp(-(W2[i]*H2[:,i] + B2[i])))
            W_he1 = np.matmul(np.log(H3/(1-H3)), pinv(H2))
            H4 = 1 / (1 + np.exp(-np.matmul(W_he1, H2)))
            o_star[:,:,lam] = np.matmul(np.matmul(inv(np.matmul(H4.T, H4) + lam_mat), H4.T), T.T)
            T_hat = (np.matmul(H4, o_star[:,:,lam])).T
            error[lam] = compute_accuracy(T_hat, T, samples)
        print("Training error =", error[np.argmin(error)], "Lambda =", lambda_vec[np.argmin(error)])
    return o_star[:,:,np.argmin(error)]

def T_hat_test(H, o_star, T, samples):
    T_hat_test = np.matmul(H, o_star).T
    error = compute_accuracy(T_hat_test, T, samples)
    print("Test error =", error, "\n")
    return error

def compute_accuracy(T_hat, T, samples):
    class_matrix = np.eye(T.shape[0],T.shape[0])
    for i in np.arange(samples):
        normi = []
        for k in np.arange(len(class_matrix)):
            norma = norm(class_matrix[:,k]-T_hat[:,i], ord=2)**2
            normi.append(norma)
        normi = np.array([normi])
        classi = np.argmin(normi)
        T_hat[:,i] = class_matrix[:,classi]
    wrongly_classified = T - T_hat
    count = 0
    for i in np.arange(T_hat.shape[0]):
        if sum(wrongly_classified[:,i] > 0) > 0:
            count = count + 1
    error = count/samples
    return error
    
if __name__=="__main__":
    data = read_data('vowel-context.txt')
    train, test, num_classes_train, num_classes_test, t_train, t_test = split_data(data)
    
    """"
    T_train = indices_to_one_hot(t_train, num_classes_train)
    T_test = indices_to_one_hot(t_test, num_classes_train)
    H_train, samples, nodes, W, B = compute_H_train(train)
    o_star_train = o_star_train(T_train, H_train, samples, nodes, num_classes_train)
    H_test, samples = compute_H_test(test, nodes, W, B)
    T_hat_test(H_test, o_star_train, T_test, samples)
    """
    

    k_folds = 4
    subset_train = int(train.shape[1]/k_folds)
    error = np.zeros((k_folds))
    o_star_mat = []
    for i in range(k_folds):
        validation_set, t_train_valid, train_train, t_train_train = split_training_data(train, t_train, i, subset_train)
        T_train = indices_to_one_hot(t_train_train, num_classes_train)
        T_test = indices_to_one_hot(t_train_valid, num_classes_test)
        H_train, samples, nodes, W, B = compute_H_train(train_train)
        o_star = o_star_train(T_train, H_train, samples, nodes, num_classes_train)
        o_star_mat.append(o_star)
        H_test, samples = compute_H_test(validation_set, nodes, W, B)
        error[i] = T_hat_test(H_test, o_star, T_test, samples)
    o_star = o_star_mat[np.argmin(error)]
    H_test, samples = compute_H_test(test, nodes, W, B)
    T_test = indices_to_one_hot(t_test, num_classes_test)
    T_hat_test(H_test, o_star, T_test, samples)
    

Training error = 0.010101010101 Lambda = 1.0
Test error = 0.08333333333333333 

Training error = 0.00505050505051 Lambda = 4.0
Test error = 0.06818181818181818 

Training error = 0.0126262626263 Lambda = 1.0
Test error = 0.07575757575757576 

Training error = 0.00757575757576 Lambda = 1.0
Test error = 0.07575757575757576 

Test error = 0.021645021645021644 

