In [1]:
import numpy as np
import pandas as pd
import sklearn as sk
#hyper-parameters
INPUT_LAYER_SIZE=784
OUTPUT_LAYER_SIZE=10
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [2]:
train=pd.read_csv('./data/train.csv')
val=pd.read_csv('./data/val.csv')
test=pd.read_csv('./data/test.csv')

In [3]:
training_data_class =train.iloc[:,-1].copy().as_matrix()
training_data=train.iloc[:,1:785].copy().as_matrix()/255.0

In [4]:
validating_data_class =val.iloc[:,-1].copy().as_matrix()
validating_data=val.iloc[:,1:785].copy().as_matrix()/255.0

In [5]:
training_data=sk.preprocessing.scale(training_data)
validating_data=sk.preprocessing.scale(validating_data)

In [43]:
print (training_data[:,5])

[-0.07072376  0.10359759 -0.07072376 ..., -0.07072376 -0.07072376
  0.10359759]


In [27]:
clf = LinearDiscriminantAnalysis()
clf.fit(training_data,training_data_class)


LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [28]:
training_data=clf.transform(training_data)
validating_data=clf.transform(validating_data)

In [29]:
training_data=sk.preprocessing.normalize(training_data)
validating_data=sk.preprocessing.normalize(validating_data)

In [30]:
training_data=np.vstack((training_data,validating_data))
training_data_class=np.concatenate((training_data_class,validating_data_class))
training_data_class.shape

(60000,)

In [6]:
def create_output(class_labels):
    '''returns one-hot vectors'''
    output=np.zeros((len(class_labels),10))
    for i in range(len(class_labels)):
        output[i,class_labels[i]]=1
    return output
def sigmoid(value):
    return 1/(1+np.exp(-value))
def tanh(value):
    return np.tanh(value)
def sigmoid_dif(value):
    return sigmoid(value)*(1-sigmoid(value))
def tanh_dif(value):
    return 1-tanh(value)*tanh(value)
def softmax(vector):
    num= np.exp(vector)
    return num/np.sum(num)
def paraCopy(w,b,multiplier=1):
    return [i*multiplier for i in w],[i*multiplier for i in b]
def paraAdd(A,B):
    a,b=A
    c,d=B
    return [i+j for i,j in zip(a,c)],[i+j for i,j in zip(b,d)]

In [12]:
def shaper(lst):
    for i in lst:
        print 'shape -',i.shape
def loss(list1,list2,loss_type):
    if loss_type=='ce': #ce 
        return -np.sum([j*np.log2(i) for i,j in zip(list1,list2)])
    if loss_type=='sq': #sq
        return 0.5*np.sum([(i-j)**2 for i,j in zip(list1,list2)])
def dumpModel(model):
    kp=0
    for i,j in zip(model.weights,model.biases):
        np.save('./temp/weights_'+str(kp),i)
        np.save('./temp/biases_'+str(kp),j)
        kp+=1


In [17]:
class NN_Model:
    def __init__(self,lr=0.01,momentum=0,hidden_layers=(100,),activation='sigmoid',loss='sq',
                 opt='gd',batch_size=1,anneal=False,save_dir='./temp/',expt_dir='./temp/',max_iter=50):
        self.lr=lr
        self.momentum=momentum
        self.layers=(INPUT_LAYER_SIZE,)+hidden_layers+(OUTPUT_LAYER_SIZE,)
        if activation=='sigmoid':
            self.activation=sigmoid
            self.activation_diff=sigmoid_dif
        elif activation=='tanh':
            self.activation=tanh
            self.activation_diff=tanh_dif
        else:
            print 'Error : activation function not found'
                
        
        self.loss=loss
        self.opt=opt
        self.batch_size=batch_size
        self.anneal=anneal
        self.save_dir=save_dir
        self.expt_dir=expt_dir
        self.num_layers=len(self.layers)
        self.max_iter=max_iter
    
    def __forward_propagation(self):
        h_set=[]
        a_set=[]
        h=self.input_data[TRAINER]
        h_set.append(h) #experimental
        L=self.num_layers-2 # 2 are input and output layers
        for k in range(L): 
            a=self.biases[k]+np.matmul(self.weights[k],h)
            h=self.activation(a)
            a_set.append(a)
            h_set.append(h)
        a=self.biases[L]+np.matmul(self.weights[L],h)
        a_set.append(a)
        y=softmax(a)
        #h_set.append(y) #experimental
        #print h.shape
            
        return h_set,a_set,y
    
    def __forward_propagation_test(self,h):
        L=self.num_layers-2 # 2 are input and output layers
        for k in range(L): 
            a=self.biases[k]+np.matmul(self.weights[k],h)
            h=self.activation(a)
        a=self.biases[L]+np.matmul(self.weights[L],h)
        y=softmax(a)
        #h_set.append(y) #experimental
        #print h.shape
            
        return y
    
    
    def __back_propagation(self,h,a,y):
        L=self.num_layers-2
        dWeights=[]
        dBiases=[]
        if self.loss=='ce':
            daL_loss=-(self.output_data[TRAINER]-y) # for cross-entropy loss function
        elif self.loss=='sq':
            y_l=y[np.argmax(self.output_data[TRAINER])]
            s=np.sum(y)
            daL_loss=(y_l**2)*np.ones(OUTPUT_LAYER_SIZE)+(y_l-s)*y-y_l*create_output([np.argmax(self.output_data[TRAINER])])[0]
            print daL_loss
        else :
            print 'Wrong loss function'
            return
        dA_loss=daL_loss
        for k in range(L,-1,-1):
            #print 'k=',k
            dW_loss=np.outer(dA_loss,h[k]).T
            dB_loss=dA_loss
            
            if k!=0:
                dH_loss=np.matmul(self.weights[k].T,dA_loss)
            
                dA_loss=dH_loss*self.activation_diff(a[k-1])
            
            
            dWeights=[dW_loss.T]+dWeights
            dBiases=[dB_loss.T]+dBiases
            #print dA_loss.shape
            #return _,_
            
            
        return dWeights,dBiases
    
    def __update(self,dWeights,dBiases):
        for i in range(len(self.weights)):
            self.weights[i]=self.weights[i]-dWeights[i]
            self.biases[i]=self.biases[i]-dBiases[i]

    
    
    
    def __train(self):
        global TRAINER
        for i in range(self.max_iter):
            if i%5==0 and i!=0 and self.anneal==True:
                self.lr/=2
            
            accuracy=0
            if self.batch_size==1:
                self.batch_size=len(training_data)
            
            sets=len(training_data)/self.batch_size # number of batches
                
            oldDWeights,oldDBiases=paraCopy(self.weights,self.biases,0) # for momentum
            for j in range(sets):
                #print 'SET=',j
                los=0
                dWeights,dBiases=paraCopy(self.weights,self.biases,0)
                for TRAINER in range(j*self.batch_size,(j+1)*self.batch_size):
                    #print 'TRAINER=',TRAINER
                    h,a,y=self.__forward_propagation()

                    
                    dWeights,dBiases=paraAdd(self.__back_propagation(h,a,y),
                                         paraCopy(dWeights,dBiases,1))
                    los+=loss(y,self.output_data[TRAINER],self.loss)

                dWeights,dBiases=paraAdd(paraCopy(oldDWeights,oldDBiases,self.momentum),
                                         paraCopy(dWeights,dBiases,self.lr))

                self.__update(dWeights,dBiases)


                
                oldDWeights,oldDBiases=paraCopy(dWeights,dBiases,1)

                #print np.argmax(y),self.raw_class_labels[TRAINER]
                #if np.argmax(y)==self.raw_class_labels[TRAINER]:

                 #   accuracy+=1
                print 'Epoch : ',i,'Step : ',j,'loss : ',los
                #print 'acc=',accuracy*1.0/sets

    def fit(self,X,Y):
        self.input_data=X
        self.output_data=create_output(Y)
        self.weights=[]
        self.biases=[]
        self.raw_class_labels=Y
        for i in range(self.num_layers-1):
            self.weights.append(np.random.uniform(low=-1.0,high=1.0,size=(self.layers[i+1],self.layers[i])))
            self.biases.append(np.random.uniform(low=-1.0,high=1.0,size=(self.layers[i+1])))
            
            #print self.biases[-1].shape
        self.__train()
        
    def resume(self,X,Y):
        self.input_data=X
        self.output_data=create_output(Y)
        self.weights=[]
        self.biases=[]
        self.raw_class_labels=Y
        for i in range(self.num_layers-1):
            self.weights.append(np.load('./temp/weights_'+str(i)+'.npy'))
            self.biases.append(np.load('./temp/biases_'+str(i)+'.npy'))
            #print self.weights[-1].shape
        self.__train()
        
        
    def predict(self,X):
        return [np.argmax(self.__forward_propagation_test(i)) for i in X]    
        
    

In [22]:
model=NN_Model(hidden_layers=(128,128),lr=0.01,max_iter=20,momentum=0,batch_size=250,anneal=True)
training_data=np.array(training_data)

In [23]:
model.fit(training_data,training_data_class)
#model.buildModel()

Epoch :  0 Step :  0 loss :  181.600622529
Epoch :  0 Step :  1 loss :  223.999999957
Epoch :  0 Step :  2 loss :  229.0
Epoch :  0 Step :  3 loss :  221.0
Epoch :  0 Step :  4 loss :  222.0


  from ipykernel import kernelapp as app
  app.launch_new_instance()


Epoch :  0 Step :  5 loss :  nan
Epoch :  0 Step :  6 loss :  nan
Epoch :  0 Step :  7 loss :  nan
Epoch :  0 Step :  8 loss :  nan
Epoch :  0 Step :  9 loss :  nan
Epoch :  0 Step :  10 loss :  nan
Epoch :  0 Step :  11 loss :  nan
Epoch :  0 Step :  12 loss :  nan
Epoch :  0 Step :  13 loss :  nan
Epoch :  0 Step :  14 loss :  nan
Epoch :  0 Step :  15 loss :  nan
Epoch :  0 Step :  16 loss :  nan
Epoch :  0 Step :  17 loss :  nan
Epoch :  0 Step :  18 loss :  nan
Epoch :  0 Step :  19 loss :  nan
Epoch :  0 Step :  20 loss :  nan


KeyboardInterrupt: 

In [33]:
(len(validating_data)-np.count_nonzero(model.predict(validating_data)-validating_data_class))/5000.0

0.8536

In [31]:
dumpModel(model)

In [12]:
testing_data=test.iloc[:,1:785].copy().as_matrix()/255.0
#testing_data=clf.transform(testing_data)

In [13]:
test_results=model.predict(testing_data)

In [14]:
tmp=[i for i in range(10000)]

In [15]:
csv_result=np.array(zip(tmp,test_results))

In [16]:
print csv_result

[[   0    4]
 [   1    4]
 [   2    2]
 ..., 
 [9997    3]
 [9998    7]
 [9999    3]]


In [17]:
np.savetxt('./res.csv',csv_result,delimiter=',',fmt='%d')