In [1]:
import numpy as np
import pandas
import math
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score

In [2]:
def read_data(file_name):
    trainY=[]
    trainX=[]
    with open(file_name) as f:
        temp = f.read().splitlines()
    temp = list(filter(None, temp))

    for line in temp:
        if (len(line))==1:
            y=map(int, line)
            trainY.append(int(line))
        else:
            trainX.append(np.array(list(map(int, line))))

    trainX=np.array(trainX).flatten()
    trainX=trainX.reshape((len(trainY),256))
    return trainX,np.array(trainY)

def shuffle_dataset(x,y):
    """
    Shuffles two arrays in the same order.
    """
    randomize = np.arange(len(x))
    randomize=np.random.shuffle(randomize)
    return np.squeeze(x[randomize]),np.squeeze(y[randomize])
trainX,trainY=read_data("traicom.txt")
testX,testY=read_data("testcom.txt")
shape_of_inputs=len(trainX[0])+1 #+1 bias term
shape_of_outputs=len(set(trainY))


In [44]:
weights=np.random.uniform(-0.001,0.001,(shape_of_outputs,shape_of_inputs)) #initialize weights
def one_hot_code(y):
    """
    Converts instance or array to one hot code encoding.
    e.g. 5->0000010000 (lenght of array is shape of output which is 10 in this case)
    """
    if isinstance(y,list) or isinstance(y,np.ndarray): #if it is a list, iterate over it and return Nx10 array
        coded=[]
        for instance in y:
            a=np.zeros(shape_of_outputs,dtype=int)
            a[instance]=1
            coded.append(a)
        return coded
    else: #if its a scalar, return 1x10 array
        a=np.zeros(shape_of_outputs,dtype=int)
        a[y]=1
        return a
def predict(testX):
    """
    Predicts given dataset inputs. Returns as class numbers(0,1,2,3) (not one hot codes)
    """
    predictions=[]
    for x in testX:
        x=np.insert(x,0,1)# +1 bias
        y=predict_an_instance(x)
        y=np.argmax(y)
        predictions.append(y)
    return predictions
def predict_without_one_hot_code(testX):
    predictions=[]
    for x in testX:
        x=np.insert(x,0,1)# +1 bias
        y=predict_an_instance(x)
        predictions.append(y)
    return predictions
def calculate_accuracy(testX,testY):
    """
    Calculates accuracy on given dataset. 
    """
    return accuracy_score(testY,predict(testX))
def calculate_recall(testX,testY):
    """
    Calculates recall on given dataset.
    """
    return recall_score(testY,predict(testX),average=None)
def predict_an_instance(x):            
    """
    Predicts given input instance using global out scope weights 2d matrix and softmax function. 
    Bias term should be added two instance before calling this function
    """
    o=np.zeros(shape_of_outputs,dtype=np.float64)
    for i  in range(shape_of_outputs):
        for j in range(shape_of_inputs):
             o[i]+=((weights[i][j]*x[j])+weights[i][0])
            #o[i]=np.exp(o[i])
        y=np.zeros(shape_of_outputs,dtype=np.float64)
        
    o -= np.max(o) # prevent overflow on taking exponent
    y = np.exp(o) / np.sum(np.exp(o)) 
    return y
def cross_entropy(predictions, targets, epsilon=1e-12):
    """
    Computes cross entropy between targets (encoded as one-hot vectors)
    """
    
    predictions = np.clip(predictions, epsilon, 1. - epsilon)
    N = predictions.shape[0]
    ce = -np.sum(np.sum(targets*np.log(predictions+1e-9)))/N
    return ce

epochs=20
learning_rate=0.01

In [45]:

for m in range(epochs):     
    shuffled_X,shuffled_Y=shuffle_dataset(trainX,trainY)
    for x,r in zip(shuffled_X,shuffled_Y):
        x=np.insert(x,0,1)# +1 bias
        y=predict_an_instance(x)
        for i in range(shape_of_outputs):
            for j in range(shape_of_inputs):
                weights[i][j]+=learning_rate*(one_hot_code(r)[i]-y[i])*x[j] #update weights            
    acc_batch=calculate_accuracy(shuffled_X,shuffled_Y)
    acc_test=calculate_accuracy(testX,testY)
    recall_batch=calculate_recall(shuffled_X,shuffled_Y)
    recall_test=calculate_recall(testX,testY)
    ce=cross_entropy(predict_without_one_hot_code(shuffled_X),one_hot_code(shuffled_Y))

    print("Epoch: "+str(m+1)+" Error: "+str(ce) + " Training Accuracy: "+ str(acc_batch) +" Test Accuracy: "+ 
          str(acc_test) +" UAR Training:" +str(np.mean(recall_batch))+ " UAR Test: "+ str(np.mean(recall_test)))
    
                    
    learning_rate+=0.001


KeyboardInterrupt: 

In [19]:

for m in range(epochs):     
    shuffled_X,shuffled_Y=shuffle_dataset(trainX,trainY)
    batch_X=np.array_split(shuffled_X,20)#20 batch
    batch_Y=np.array_split(shuffled_Y,20)
    for b_X,b_Y in zip(batch_X,batch_Y):
        for x,r in zip(b_X,b_Y):
            x=np.insert(x,0,1)# +1 bias
            y=predict_an_instance(x)
            for i in range(shape_of_outputs):
                for j in range(shape_of_inputs):
                    weights[i][j]+=learning_rate*(one_hot_code(r)[i]-y[i])*x[j] #update weights            
        acc_batch=calculate_accuracy(shuffled_X,shuffled_Y)
        acc_test=calculate_accuracy(testX,testY)
        recall_batch=calculate_recall(shuffled_X,shuffled_Y)
        recall_test=calculate_recall(testX,testY)
        ce=cross_entropy(predict_without_one_hot_code(shuffled_X),one_hot_code(shuffled_Y))

        print("Epoch: "+str(m+1)+" Error: "+str(ce) + " Training Accuracy: "+ str(acc_batch) +" Test Accuracy: "+ 
              str(acc_test) +" UAR Training:" +str(np.mean(recall_batch))+ " UAR Test: "+ str(np.mean(recall_test)))


    learning_rate+=0.001


(600, 256)

In [14]:
weights.shape

(10, 257)

In [40]:
predictions=[]
for x,r in zip(testX,testY):
    x=np.insert(x,0,1)# +1 bias
    y=predict(x)
    y=np.argmax(y)
    predictions.append(y)

In [47]:
accuracy_score(testY,one_hot_code(predictions))

IndexError: arrays used as indices must be of integer (or boolean) type

In [17]:
shuffled_X.shape

(600, 256)

In [27]:
cross_entropy(predict(trainX),one_hot_code(trainY))

ValueError: operands could not be broadcast together with shapes (600,10) (600,) 

In [36]:
epsilon=1e-12
predictions = np.clip(predict_without_one_hot_code(trainX), epsilon, 1. - epsilon)


In [39]:
predictions.shape

(600, 10)