In [24]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from numpy.linalg import inv
from numpy.linalg import norm

def read_data(datafile):
    with open(datafile, 'r', encoding='utf-8-sig') as data_file:
        data = []
        for line in data_file:
            lines = line.split()
            lines = [float(i) for i in lines]
            data.append(lines)
        data = np.array(data)
    return data

def split_data(data):
    train = data[0:np.min(np.where(data[:,0]>0)),:]
    test = data[np.min(np.where(data[:,0]>0)):len(data),:]
    train_classes_t = int(np.max(train[:,len(train.T)-1]))
    test_classes_t = int(np.max(test[:,len(train.T)-1]))
    traindata_coly = list(train[:,len(train.T)-1].astype(int))
    testdata_coly = list(test[:,len(train.T)-1].astype(int))
    train_features = train[:,3:len(train.T)-1]
    test_features = test[:,3:len(train.T)-1]
    train_features = train_features.T
    test_features = test_features.T
    return train, test, train_classes_t, test_classes_t, traindata_coly, testdata_coly, train_features, test_features

def indices_to_one_hot(data, nb_classes):
    return np.eye(nb_classes+1)[np.array(data)]

def training(train_features, train_T):
    error_vec = []
    max_lambda = 10
    min_lambda = 0.1
    length = int(max_lambda/min_lambda)
    lambda_vec = np.linspace(min_lambda,max_lambda,length)
    A_fit_vec = []

    for lambda1 in lambda_vec:
        lambda_mat = lambda1*np.ones((len(train_features), len(train_features)))
        A_fit = np.matmul(np.matmul(inv(np.matmul(train_features, train_features.T) + lambda_mat), train_features), train_T.T)
        A_fit = A_fit.T
        A_fit_vec.append(A_fit)
        t_tilde = np.matmul(A_fit,train_features)
        e = np.eye(11,11)
        for i in np.arange(len(t_tilde.T)):
            normi = []
            for k in np.arange(len(e)):
                norma = norm(e[:,k]-t_tilde[:,i], ord=2)**2
                normi.append(norma)
            normi = np.array([normi])
            classi = np.argmin(normi)
            t_tilde[:,i] = e[:,classi]
        wrongly_classified = train_T - t_tilde
        count = 0
        for i in np.arange(len(train_features.T)):
            if sum(wrongly_classified[:,i] > 0) > 0:
                count = count + 1
        train_error = count/len(train_features.T)
        error_vec.append(train_error)
    lam = lambda_vec[np.argmin(error_vec)]
    A_fit = A_fit_vec[np.argmin(error_vec)]
    print("Training error =", error_vec[np.argmin(error_vec)], "Lambda =", lam)
    return lam, A_fit
    
def testing(lam, A_fit):
    lam_mat = lam*np.ones((len(train_features), len(train_features)))
    t_tilde_test = np.matmul(A_fit, test_features)
    e2 = np.eye(11,11)

    for p in np.arange(len(t_tilde_test.T)):
        normi2 = []
        for m in np.arange(len(e2)):
            norma2 = norm(e2[:,m]-t_tilde_test[:,p], ord=2)**2
            normi2.append(norma2)
        normi2 = np.array([normi2])
        classi2 = np.argmin(normi2)
        t_tilde_test[:,p] = e[:,classi2]
    
    wrongly_classified2 = test_T - t_tilde_test
    count = 0
    for i in np.arange(len(test_features.T)):
        if sum(wrongly_classified2[:,i] > 0) > 0:
            count = count + 1
    test_error = count/len(test_features.T)
    print("test error =", test_error)

if __name__=="__main__":
    data = read_data('vowel-context.txt')
    train, test, train_classes_t, test_classes_t, traindata_coly, testdata_coly, train_features, test_features = split_data(data)
    train_T = indices_to_one_hot(traindata_coly, train_classes_t)
    test_T = indices_to_one_hot(testdata_coly, test_classes_t)
    train_T = train_T.T
    test_T = test_T.T
    lam, A_fit = training(train_features, train_T)
    testing(lam, A_fit)


Training error = 0.5416666666666666 Lambda = 0.5
test error = 0.7186147186147186


In [26]:
np.linspace(0.1,10,100)

array([ 0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,  1.1,
        1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,  2.2,
        2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,  3.3,
        3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,  4.4,
        4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,  5.5,
        5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,  6.5,  6.6,
        6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,  7.4,  7.5,  7.6,  7.7,
        7.8,  7.9,  8. ,  8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,  8.8,
        8.9,  9. ,  9.1,  9.2,  9.3,  9.4,  9.5,  9.6,  9.7,  9.8,  9.9,
       10. ])