In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import erf
import os

In [None]:
def softmax2(x):
        return np.exp(x) / np.sum(np.exp(x), axis=1)

class perceptron():
    def __init__(self ,W ,bias , learning_rate = 0.1 , theta = 0.5):
        self.learning_rate = learning_rate
        self.theta = theta
        self.W = W
        self.bias = bias 
    
    def set_weight(self , weight):
        self.W  = weight
        
    def set_theta(self ,theta):
        self.theta = theta
        
    def set_learningrate(self , lr):
        self.learning_rate = lr
        
    def activation(self , Y):
        res = np.ones_like(Y)
        res[Y > self.theta] = 1  
           
        res[(Y > -self.theta) & (Y < self.theta)] = 0  
             
        res[Y < -self.theta] = -1
        return res
    
    
    
    def fit(self , X , y ):
        w = self.W
        b = self.bias 
        b = b.reshape(1,-1)
        epoch_to_convergence = 0 
        while True:

            wrong_classifications = 0
            epoch_to_convergence+=1
            for sample in X:
                x = X[sample].reshape(1,-1)
                y_in = np.dot(x , w) + b
                y_in = 2 * softmax2(y_in)-1
                result = self.activation(y_in)
                target = y[sample[0]].reshape(1,-1)
                difference = target - result
                
                if (difference!= 0).any():
                    wrong_classifications += 1
                    
                    w = w + self.learning_rate * np.dot(x.T , target)
                    b = b + self.learning_rate * target
            if wrong_classifications == 0:
                self.W = w
                self.bias = b.T
                return epoch_to_convergence
    
    
    def predict(self ,test_sample):
      x= test_sample.reshape(1,-1)
      y_in = x.dot(self.W)
      y_in = 2*softmax2(y_in)-1
      y_out= self.activation(y_in)
      return y_out

In [None]:
def readfile(fpath,test = False) :
    file_lists = os.listdir(fpath)
    files_dic = dict()
    for file in file_lists:
        file_path = os.path.join(fpath , file)
        with open(file_path,"r") as train_file:
            content = train_file.read()
            files_dic[file] = content
    for key in files_dic:
        if test:
            files_dic[key] = files_dic[key].replace(".", "0").replace("#", "1").replace("\n","").replace('@','1').replace('o','0')
        else:
            files_dic[key] = files_dic[key].replace(".", "0").replace("#", "1").replace("\n","")
        files_dic[key] = [int(x) for x in files_dic[key]]
        files_dic[key] = np.array(files_dic[key])
    return {a:files_dic[a] for a in files_dic}


In [None]:
train_data = readfile(fpath = "Characters-TrainSet/Characters-TrainSet")
test_data = readfile(fpath = "Characters-TestSet/Characters-TestSet",test = True)

In [None]:
labels = {}
labels['A']= np.array([1,-1,-1,-1,-1,-1,-1])
labels['B']= np.array([-1,1,-1,-1,-1,-1,-1])
labels['C']= np.array([-1,-1,1,-1,-1,-1,-1])

labels['D']= np.array([-1,-1,-1,1,-1,-1,-1])
labels['E']= np.array([-1,-1,-1,-1,1,-1,-1])
labels['J']= np.array([-1,-1,-1,-1,-1,1,-1])
labels['K']= np.array([-1,-1,-1,-1,-1,-1,1])

In [None]:
weights1 = np.zeros([63 , 7])
bias  = np.zeros(7)
zero_weights = perceptron(weights1 , bias)
zepochs  = zero_weights.fit(train_data,labels)
zepochs

In [None]:
predict_dic1 = {}
for item in test_data:
    predict_dic1[item] = zero_weights.predict(test_data[item])
wrong_predictions1 = np.sum([1 for prediction in predict_dic1 if( predict_dic1[prediction]!=labels[prediction[0]]).any()])
error_rate1 = wrong_predictions1/len(predict_dic1)
error_rate1

In [None]:
weights2 = np.random.uniform(low = -1.0 , high = 1.0 , size = (63,7))
uniform_weights = perceptron(weights2 , bias)
uniepochs  = uniform_weights.fit(train_data,labels)
uniepochs

In [None]:
predict_dic2 = {}
for item in test_data:
    predict_dic2[item] =uniform_weights.predict(test_data[item])
wrong_predictions2 = np.sum([1 for prediction in predict_dic2 if( predict_dic2[prediction]!=labels[prediction[0]]).any()])
error_rate2 = wrong_predictions2/len(predict_dic2)
error_rate2

In [None]:
weights3 = np.random.normal(loc = 0.0 , scale = 0.5 , size = (63,7))
normal_weights = perceptron(weights3 , bias)
norepochs  = normal_weights.fit(train_data,labels)
norepochs

In [None]:
predict_dic3 = {}
for item in test_data:
    predict_dic3[item] =normal_weights.predict(test_data[item])
wrong_predictions3 = np.sum([1 for prediction in predict_dic3 if( predict_dic3[prediction]!=labels[prediction[0]]).any()])
error_rate3 = wrong_predictions3/len(predict_dic3)
error_rate3

In [None]:
std_d = np.sqrt(2 / (63 + 7))
weights4 = np.random.normal(loc = 0.0 , scale = std_d , size = (63,7))
xavier_weights = perceptron(weights4 , bias)
xepochs  = xavier_weights.fit(train_data,labels)
xepochs


In [None]:
predict_dic4 = {}
for item in test_data:
    predict_dic4[item] =xavier_weights.predict(test_data[item])
wrong_predictions4 = np.sum([1 for prediction in predict_dic4 if( predict_dic4[prediction]!=labels[prediction[0]]).any()])
error_rate4 = wrong_predictions4/len(predict_dic4)
error_rate4

In [None]:

std_d1 = np.sqrt(2 / 63 )
weights5 = np.random.normal(loc = 0.0 , scale = std_d1 , size = (63,7))
kaiming_weights = perceptron(weights5 , bias)
kepochs  = kaiming_weights.fit(train_data,labels)
kepochs


In [None]:
predict_dic5 = {}
for item in test_data:
    predict_dic5[item] =xavier_weights.predict(test_data[item])
wrong_predictions5 = np.sum([1 for prediction in predict_dic5 if( predict_dic5[prediction]!=labels[prediction[0]]).any()])
error_rate5 = wrong_predictions5/len(predict_dic5)
error_rate5

In [None]:
uni = perceptron( None , bias)
performance_uni = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in range(50):
    uni.set_weight (np.random.uniform(low = -1.0 , high = 1.0 , size = (63,7)))
    e1 = uni.fit(train_data , labels)
    predict_dic = {}
    for item in test_data:
        predict_dic[item] =uni.predict(test_data[item])
    wrong_predictions = np.sum([1 for prediction in predict_dic if( predict_dic[prediction]!=labels[prediction[0]]).any()])
    error_rate = wrong_predictions/len(predict_dic)
    performance_uni.loc[i] = [e1,error_rate]
    
performance_uni['error rate'].plot(title= 'Error rate vs initial weights')

In [None]:
nor = perceptron( None , bias)
performance_nor = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in range(50):
    nor.set_weight (np.random.normal(loc = 0.0 , scale = 0.5 , size = (63,7)))
    e1 = nor.fit(train_data , labels)
    predict_dic = {}
    for item in test_data:
        predict_dic[item] =nor.predict(test_data[item])
    wrong_predictions = np.sum([1 for prediction in predict_dic if( predict_dic[prediction]!=labels[prediction[0]]).any()])
    error_rate = wrong_predictions/len(predict_dic)
    performance_nor.loc[i] = [e1,error_rate]
    
performance_nor['error rate'].plot(title= 'Error rate vs initial weights')

In [None]:
xa = perceptron( None , bias)
performance_xa = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in range(50):
    xa.set_weight (np.random.normal(loc = 0.0 , scale = np.sqrt(2 / (63 + 7)) , size = (63,7)))
    e1 = xa.fit(train_data , labels)
    predict_dic = {}
    for item in test_data:
        predict_dic[item] =xa.predict(test_data[item])
    wrong_predictions = np.sum([1 for prediction in predict_dic if( predict_dic[prediction]!=labels[prediction[0]]).any()])
    error_rate = wrong_predictions/len(predict_dic)
    performance_xa.loc[i] = [e1,error_rate]
    
performance_xa['error rate'].plot(title= 'Error rate vs initial weights')

In [None]:
ke = perceptron( None , bias)
performance_ke = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in range(50):
    ke.set_weight (np.random.normal(loc = 0.0 , scale = np.sqrt(2 / 63 ) , size = (63,7)))
    e1 = ke.fit(train_data , labels)
    predict_dic = {}
    for item in test_data:
        predict_dic[item] =ke.predict(test_data[item])
    wrong_predictions = np.sum([1 for prediction in predict_dic if( predict_dic[prediction]!=labels[prediction[0]]).any()])
    error_rate = wrong_predictions/len(predict_dic)
    performance_ke.loc[i] = [e1,error_rate]
    
performance_ke['error rate'].plot(title= 'Error rate vs initial weights')

In [None]:
# to check the performance
print(performance_ke)
print(performance_xa)
print(performance_nor)
print(performance_uni)


In [None]:
weights = np.random.normal(loc = 0.0 , scale = np.sqrt(2 / (63 + 7)) , size = (63,7))
test = perceptron(weights  , bias)
performance_theta = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in np.arange(0.1 , 1 , 0.05):
    test.set_weight(weights)
    test.set_theta(i)
    e1 = test.fit(train_data , labels)
    predict_dic = {}
    for item in test_data:
        predict_dic[item] =test.predict(test_data[item])
    wrong_predictions = np.sum([1 for prediction in predict_dic if( predict_dic[prediction]!=labels[prediction[0]]).any()])
    error_rate = wrong_predictions/len(predict_dic)
    performance_theta.loc[i] = [e1,error_rate]
    
performance_theta['error rate'].plot(title= 'Error rate vs  theta')

In [None]:

performance_LR = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
test.set_theta(0.3)
for i in np.arange(0.1 , 1 , 0.1):
    test.set_weight(weights)
    test.set_theta(i)
    e1 = test.fit(train_data , labels)
    predict_dic = {}
    for item in test_data:
        predict_dic[item] =test.predict(test_data[item])
    wrong_predictions = np.sum([1 for prediction in predict_dic if( predict_dic[prediction]!=labels[prediction[0]]).any()])
    error_rate = wrong_predictions/len(predict_dic)
    performance_LR.loc[i] = [e1,error_rate]
    
performance_LR['error rate'].plot(title= 'Error rate vs  learning rate')

In [None]:
class adline():
    def __init__(self ,W ,bias , learning_rate = 0.1 , theta = 0.5):
        self.learning_rate = learning_rate
        self.theta = theta
        self.W = W
        self.bias = bias 
    
    def set_weight(self , weight):
        self.W  = weight
        
    def set_theta(self ,theta):
        self.theta = theta
        
    def set_learningrate(self , lr):
        self.learning_rate = lr
        
    def activation(self , Y):
        res = np.ones_like(Y)
        res[Y > self.theta] = 1  
           
        res[(Y > -self.theta) & (Y < self.theta)] = 0  
             
        res[Y < -self.theta] = -1
        return res
    
    
    
    def fit(self , X , y ):
        w = self.W
        b = self.bias 
        b = b.reshape(1,-1)
        time_to_convergence = 0 
        while True:
            
            wrong_classifications = 0
            time_to_convergence+=1
            for sample in X:
                x = X[sample].reshape(1,-1)
                y_in = np.dot(x , w) + b
                y_in = 2 * softmax2(y_in)-1
                result = self.activation(y_in)
                target = y[sample[0]].reshape(1,-1)
                difference = target - result
                
                
                if (difference!= 0).any():
                    wrong_classifications += 1
                    delta_w=self.learning_rate*x.T.dot(target)
                    w = w +  delta_w
                    b = b + self.learning_rate * target
            if wrong_classifications == 0:
                self.W = w
                self.bias = b.T
                return time_to_convergence
    
    
    def predict(self ,test_sample):
      x= test_sample.reshape(1,-1)
      y_in = x.dot(self.W)
      y_in = 2*softmax2(y_in)-1
      y_out= self.activation(y_in)
      return y_out

In [None]:
weights5 = np.zeros([63,7])
zero_weights2 = adline(weights5 , bias)
zepochs2  = zero_weights2.fit(train_data,labels)
zepochs2

In [None]:
predict_dic6 = {}
for item in test_data:
    predict_dic6[item] = zero_weights2.predict(test_data[item])
wrong_predictions6 = np.sum([1 for prediction in predict_dic6 if( predict_dic6[prediction]!=labels[prediction[0]]).any()])
error_rate6 = wrong_predictions6/len(predict_dic6)
error_rate6

In [None]:
uni2 = adline( None , bias)
performance_uni2 = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in range(50):
    uni2.set_weight (np.random.uniform(low = -1.0 , high = 1.0 , size = (63,7)))
    e2 = uni2.fit(train_data , labels)
    predict_dic7 = {}
    for item in test_data:
        predict_dic7[item] =uni2.predict(test_data[item])
    wrong_predictions7 = np.sum([1 for prediction in predict_dic7 if( predict_dic7[prediction]!=labels[prediction[0]]).any()])
    error_rate7 = wrong_predictions7/len(predict_dic7)
    performance_uni2.loc[i] = [e2,error_rate7]
    
performance_uni2['error rate'].plot(title= 'Error rate vs initial weights')

In [None]:
nor2 = adline( None , bias)
performance_nor2 = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in range(50):
    nor2.set_weight (np.random.normal(loc = 0.0 , scale = 0.5 , size = (63,7)))
    e2 = nor2.fit(train_data , labels)
    predict_dic7 = {}
    for item in test_data:
        predict_dic7[item] =nor2.predict(test_data[item])
    wrong_predictions7 = np.sum([1 for prediction in predict_dic7 if( predict_dic7[prediction]!=labels[prediction[0]]).any()])
    error_rate7 = wrong_predictions7/len(predict_dic7)
    performance_nor2.loc[i] = [e2,error_rate7]
    
performance_nor['error rate'].plot(title= 'Error rate vs initial weights')

In [None]:
xa2 = adline( None , bias)
performance_xa2 = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in range(50):
    xa2.set_weight (np.random.normal(loc = 0.0 , scale = np.sqrt(2 / (63 + 7)) , size = (63,7)))
    e2 = xa2.fit(train_data , labels)
    predict_dic2 = {}
    for item in test_data:
        predict_dic7[item] =xa2.predict(test_data[item])
    wrong_predictions7 = np.sum([1 for prediction in predict_dic7 if( predict_dic7[prediction]!=labels[prediction[0]]).any()])
    error_rate7 = wrong_predictions7/len(predict_dic7)
    performance_xa2.loc[i] = [e2,error_rate7]
    
performance_xa2['error rate'].plot(title= 'Error rate vs initial weights')

In [None]:
ke2 = adline( None , bias)
performance_ke2 = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in range(50):
    ke2.set_weight (np.random.normal(loc = 0.0 , scale = np.sqrt(2 / 63 ) , size = (63,7)))
    e2 = ke2.fit(train_data , labels)
    predict_dic7 = {}
    for item in test_data:
        predict_dic7[item] =ke2.predict(test_data[item])
    wrong_predictions7 = np.sum([1 for prediction in predict_dic7 if( predict_dic7[prediction]!=labels[prediction[0]]).any()])
    error_rate7 = wrong_predictions7/len(predict_dic7)
    performance_ke2.loc[i] = [e2,error_rate7]
    
performance_ke2['error rate'].plot(title= 'Error rate vs initial weights')

In [None]:
weights7 = np.random.normal(loc = 0.0 , scale = np.sqrt(2 / (63 + 7)) , size = (63,7))
test2 = adline(weights7  , bias)
performance_theta2 = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in np.arange(0.1 , 1 , 0.05):
    test2.set_weight(weights7)
    test2.set_theta(i)
    e2 = test2.fit(train_data , labels)
    predict_dic7 = {}
    for item in test_data:
        predict_dic7[item] =test2.predict(test_data[item])
    wrong_predictions7 = np.sum([1 for prediction in predict_dic if( predict_dic7[prediction]!=labels[prediction[0]]).any()])
    error_rate7 = wrong_predictions7/len(predict_dic7)
    performance_theta2.loc[i] = [e2,error_rate7]
    
performance_theta2['error rate'].plot(title= 'Error rate vs  theta')

In [None]:

performance_LR2 = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
test2.set_theta(0.3)
for i in np.arange(0.1 , 1 , 0.1):
    test2.set_weight(weights7)
    test2.set_theta(i)
    e2 = test2.fit(train_data , labels)
    predict_dic7 = {}
    for item in test_data:
        predict_dic7[item] =test2.predict(test_data[item])
    wrong_predictions7 = np.sum([1 for prediction in predict_dic7 if( predict_dic7[prediction]!=labels[prediction[0]]).any()])
    error_rate7 = wrong_predictions7/len(predict_dic7)
    performance_LR2.loc[i] = [e2,error_rate7]
    
performance_LR2['error rate'].plot(title= 'Error rate vs  learning rate')

In [None]:
def find_best(df, target_df, method, initial_weight):
    min_epoch_row = df.loc[df.loc[df['error rate'] == df['error rate'].min()]['epoch_to_converge'].idxmin()]
    
    new_row = {
        'model': method,
        'initial_weight': initial_weight,
        'error_rate': min_epoch_row['error rate'],
        'epochs': min_epoch_row['epoch_to_converge']
    }
    
    target_df = target_df.append(new_row, ignore_index=True)
    
    return target_df


In [None]:
compare_df = pd.DataFrame(columns = ["model" , 'initial_weight' , 'epochs' , 'error_rate' ])


compare_df = find_best(performance_xa, compare_df, method="Perceptron", initial_weight="Xavier")
compare_df = find_best(performance_ke, compare_df, method="Perceptron", initial_weight="Kaiming")
compare_df = find_best(performance_nor, compare_df, method="Perceptron", initial_weight="Random Normal")
compare_df = find_best(performance_xa2, compare_df, method="Adline", initial_weight="Xavier")
compare_df = find_best(performance_ke2, compare_df, method="Adline", initial_weight="Kaiming")
compare_df = find_best(performance_nor2, compare_df, method="Adline", initial_weight="Random Normal")
compare_df

In [None]:
#this function read file in projecton way
def readfile2(fpath,test = False) :
    file_lists = os.listdir(fpath)
    files_dic = dict()
    for file in file_lists:
        file_path = os.path.join(fpath , file)
        with open(file_path,"r") as train_file:
            content = train_file.read()
            files_dic[file] = content
    for key in files_dic:
        if test:
            files_dic[key] = files_dic[key].replace(".", "0").replace("#", "1").replace("\n","").replace('@','1').replace('o','0')
        else:
            files_dic[key] = files_dic[key].replace(".", "0").replace("#", "1").replace("\n","")

        files_dic[key] = [int(x) for x in files_dic[key]]
        files_dic[key] = np.array(files_dic[key]).reshape(9 , 7)
        files_dic[key] = np.concatenate([files_dic[key].sum(axis=1),files_dic[key].sum(axis=0)]) # add 1 that are in a row and column
    return {a:files_dic[a] for a in files_dic}


In [None]:
train_data2 = readfile2('C:/Users/ZETTA/Downloads/Characters-TrainSet/Characters-TrainSet')
test_data2 = readfile2('C:/Users/ZETTA/Downloads/Characters-TestSet/Characters-TestSet',test = True)

In [None]:
#xavier weight
xa3= adline( None , bias , learning_rate = 1)
performance_xa3 = pd.DataFrame(columns = ['epoch_to_converge','error rate'])
for i in range(50):
    xa3.set_weight (np.random.random([16,7]))
    e2 = xa3.fit(train_data2 , labels)
    predict_dic7 = {}
    for item in test_data:
        predict_dic7[item] =xa3.predict(test_data2[item])
    wrong_predictions7 = np.sum([1 for prediction in predict_dic7 if( predict_dic7[prediction]!=labels[prediction[0]]).any()])
    error_rate7 = wrong_predictions7/len(predict_dic7)
    performance_xa3.loc[i] = [e2,error_rate7]

performance_xa3['error rate'].plot(title= 'Error rate vs initial weights')