In [170]:
#import statements
import numpy as np
from sklearn.cross_validation import KFold
from sklearn import datasets
import math
import pandas as pd

In [171]:
'''
Function desc: load the data 
returns: numpy array containing all features and classes (Iris-versicolor and Iris-virginica)
'''
def load_data():
    digits = datasets.load_digits()
    x = digits['data']
    y = digits['target'] 
    return x,y

In [172]:
x,y=load_data()
print np.shape(x)[0]

1797


In [173]:
'''
Function descriotion: Function to initialize the network parameters

'''

def build_MLP(iunits, hunits, ounits):
    w = np.random.normal(0.1,0.9, (hunits, iunits)) #Different distributions to initialize weights
    v = np.random.normal(0.1, 0.9, (ounits, hunits))  #Different distributions to initialize weights
    return w,v


In [174]:
# initializing the network with number of input units, output units and hidden units
iunits = np.shape(x)[1]
print iunits
ounits = len(np.unique(y))
hunits = int(math.ceil((iunits+ounits)/2))
hunits=69
w,v=build_MLP(iunits,hunits,ounits)

64


In [175]:
'''
Function Desc: Calculte softmax function

'''
def softmax(ip):
    deno=np.sum(np.exp(ip))
    return np.array(np.exp(ip)/deno)

In [176]:
'''
Function Desc: Calculte sigmoid function

'''
def sigmoid(ip):
    return 1./(1.+np.exp(-ip))

In [177]:
'''
Function Desc: Update the parameters

'''
def update_params(w,v,ip, op,step):
    beta = 0.2
    ip = np.array(ip, ndmin=2).T #convert thr input into two dimensional data
    yhats = np.array(op, ndmin=2).T #convert thr output into two dimensional data
    hunit_ip = np.dot(w, ip) #weighted inputs
    hunit_op = sigmoid(hunit_ip) #calculate 'z' hidden unit output using sigmoid funtion
    ounit_ip = np.dot(v, hunit_op) #weighted outputs
    ounit_op = softmax(ounit_ip) # calculate 'yhat using softmax funtion
    error = ounit_op-yhats #calculate error 
    v -= step * np.dot(error, np.transpose(hunit_op)) #update v
    curr_w=w
    if whist:
        prev_w = whist[0]
    else:
        prev_w = curr_w
    w -= (step * (np.dot((np.dot(v.T, error)  * hunit_op * (1.0 - hunit_op)), np.transpose(ip))))+beta*(curr_w-prev_w)
    whist.append(w)

In [178]:

no_iterations = 10
step_size = 0.05
whist=[]
for it in range(no_iterations):
    for i in range(np.shape(x)[0]):
        yhats = np.zeros(ounits)
        yhats[int(y[i])] = 1
        update_params(w,v,x[i],yhats,step_size)

In [179]:
print np.shape(w)
print np.shape(v)

(69L, 64L)
(10L, 69L)


In [180]:
'''
Function Desc: Predict the labels of the test samples. This function is similar to update_params, the only difference 
is weights will not be updated

'''
def predict(ip):
    ip = np.array(ip, ndmin=2).T
    hunit_ip = np.dot(w, ip)
    hunit_op = sigmoid(hunit_ip)
    ounit_ip = np.dot(v, hunit_op)
    predicts = softmax(ounit_ip)
    return predicts

In [182]:
k=0
for i in range(np.shape(x)[0]):
    op=predict(x[i])
    pred = np.argmax(op)
    if y[i] == pred:
        k+=1
print  1.0*k/np.shape(x)[0]

0.87367835281


In [183]:
'''
Function desc: To calculate the model parameters suh as precision, recall and f-measure from the confusion-matrix
Inuput: Actual and predicted values
Output: model parameters


Classification accuracy
(TP + TN) / (TP + TN + FP + FN)
Error rate
(FP + FN) / (TP + TN + FP + FN)

Precision: (or Positive predictive value)
proportion of predicted positives which
are actual positive
TP / (TP + FP)
Recall: proportion of actual positives
which are predicted positive
TP / (TP + FN)

'''

def model_eval(actual,predicted):
    Truth= pd.Series(actual,name = 'Truth' )
    Predicted = pd.Series(predicted,name='Predicted')
    confusion_matrix = pd.crosstab(Truth, Predicted)
    arr_cm = confusion_matrix.as_matrix()
    diag = arr_cm.diagonal()
    accuracy = float(sum(diag))/np.sum(arr_cm)
    precision = np.divide(diag,1.0*(np.sum(arr_cm,axis=0))) 
    recall = np.divide(diag,1.0*(np.sum(arr_cm,axis=0)))
    fmeasure = 2*((precision * recall)/(precision + recall))
    return confusion_matrix,accuracy,precision,recall,fmeasure

In [194]:
'''Function to perform 10-fold cross validation. In this method the test and traparams in indices are split using using the inbuit
   'KFold' function.
    Input 1: 'x' 
    Input 2: True predicted values
    Input 3: No of folds (10 by default)
    
    Performance measures such as accuracy, precision, f-measure
'''

def x_fold_validation(x,y,nfolds=5,shuffle=True,random_state=23):
    confusion_matrix_list=[]
    accuracy_list=[]
    precision_list=[]
    recall_list=[]
    fmeasure_list=[]
    mom_wei=[]
    step_size = 0.05
    cv = KFold(len(y), nfolds,shuffle=True,random_state=23) #inbuilt function to split the indices
    for train_idx, test_idx in cv:
        x_train = x[train_idx]
        y_train = y[train_idx]
        x_test = x[test_idx]
        y_test = y[test_idx]
        for it in range(100):
            for i in xrange(len(x_train)) :
                yhats = np.zeros(len(np.unique(y)))
                yhats[int(y_train[i])] = 1
                update_params(w,v,x_train[i],yhats,step_size)
            pred=[]
        for i in x_test:
            outputs = predict(i)
            label = np.argmax(outputs)
            pred.append(label)
        conf_matrix,accuracy,precision,recall,fmeasure = model_eval(y_test.tolist(),pred)
        print conf_matrix
        confusion_matrix_list.append(conf_matrix)
        accuracy_list.append(accuracy)
        precision_list.append(precision.tolist())
        recall_list.append(recall.tolist())
        fmeasure_list.append(fmeasure.tolist())
    print '################################'
    print 'Average of the model parameters'
    print '################################'
    print 'Error rate:', 1- np.mean(accuracy_list)
    print 'Accuracy:', np.mean(accuracy_list) 
    print 'Precision:', np.mean(precision_list,axis=0)
    print 'Recall:', np.mean(recall_list,axis=0)
    print 'F-measure:', np.mean(fmeasure_list,axis=0)
    
        

In [195]:

x_fold_validation(x,y)


Predicted   0   1   2   3   4   5   6   7   8   9
Truth                                            
0          30   0   0   0   0   0   0   0   0   0
1           0  31   0   0   0   0   2   0   3   4
2           0   0  29   9   0   0   0   0   0   0
3           0   0   0  29   0   0   0   0   0   0
4           0   3   0   0  37   0   0   0   0   0
5           0   0   0   7   0  35   0   1   1   0
6           0   2   1   0   0   0  28   0   0   0
7           0   0   0   0   0   0   0  28   0   0
8           0   1   0   1   0   0   0   0  35   1
9           0   0   0   3   0   0   0   1   0  38
Predicted   0   1   2   3   4   5   6   7   8   9
Truth                                            
0          34   0   0   0   0   0   0   0   0   0
1           0  35   0   0   0   0   0   0   3   1
2           0   0  34   5   0   0   0   0   0   0
3           0   0   0  48   0   0   0   0   0   0
4           0   0   0   0  25   0   0   1   0   1
5           0   0   0   2   0  26   0   0   0   0
