In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 1a)and 1b)  PERCEPTRON ALGORITHM

In [2]:
class Perceptron:
    def __init__(self):
        self.W=None
    
    def train(self,X,y,epoch=10000):
        self.W=np.zeros(X.shape[1])    #INITIALIZING WEIGHTS WITH ZEROES
        t = 0                         #NUMBER OF TIMES WEIGHT VECTOR UPDATED
        for i in range(epoch):
            pos=np.random.randint(0,X.shape[0])   #SELECTING A RANDOM ROWDATA
            row_x = X[pos]
            row_y = y[pos]
            
            if (np.dot(self.W,row_x)*row_y) <= 0:  #CHECKING TO UPDATE WEIGHTS OR NOT
                self.W += row_x*row_y
    
    def errors(self,X,y):
        count = 0         #NUMBER OF PREDICTIONS ARE correct
        for i in range(X.shape[0]):
            row_x = X[i]
            row_y = y[i]
            if (self.predict(row_x) * row_y <0):
                count += 1
#         print(count)
        return count/X.shape[0]    
            
    
    
    def predict(self,x):
        if np.dot(self.W,x)<0:
            return -1
        return 1
                    
        

### READING DATASET

In [3]:
df = pd.read_csv('data_banknote_authentication.txt',names=['Variance','Skewness','Curtosis','Entropy','Class'])

#### DATA PREPROCESSING

In [4]:
#CHECKING NA
df[df.isna().any(axis=1)]

Unnamed: 0,Variance,Skewness,Curtosis,Entropy,Class


NORMALIZING COLUMNS

In [5]:
df['Variance']=(df['Variance']-df['Variance'].min())/(df['Variance'].max()-df['Variance'].min())

df['Skewness']=(df['Skewness']-df['Skewness'].min())/(df['Skewness'].max()-df['Skewness'].min())

df['Curtosis']=(df['Curtosis']-df['Curtosis'].min())/(df['Curtosis'].max()-df['Curtosis'].min())

df['Entropy']=(df['Entropy']-df['Entropy'].min())/(df['Entropy'].max()-df['Entropy'].min())

In [6]:
df['Class']  = df['Class'].replace(0,-1)

In [7]:
df.head()

Unnamed: 0,Variance,Skewness,Curtosis,Entropy,Class
0,0.769004,0.839643,0.106783,0.736628,-1
1,0.835659,0.820982,0.121804,0.644326,-1
2,0.786629,0.416648,0.310608,0.786951,-1
3,0.757105,0.871699,0.054921,0.45044,-1
4,0.531578,0.348662,0.424662,0.687362,-1


In [8]:
#SHUFFLING THE DATAFRAME
df = df.sample(frac = 1)

In [9]:
X=df.iloc[:,:4].values
y=df.iloc[:,-1].values

In [10]:
#CREATING AN OBJECT OF THE CLASS
p=Perceptron()
p.train(X,y)

In [11]:
#WEIGHTS
p.W

array([-5.89317369,  0.17318093, -0.67532938,  4.46973282])

In [12]:
p.errors(X,y)

0.12099125364431487

## 1c)CROSS VALIDATION

In [13]:
#10 FOLD CV
k=10
fold_size = X.shape[0]//10

errors = []    #TEST SET ACCURACIES
obj = []       #storing each perceptron object

# indices_arr = np.random.permutation(X.shape[0])   #SHUFFLING INDICES
indices_arr = np.arange(0,X.shape[0])
chunk=np.array_split(indices_arr,k)

for i in chunk:
    X_train=X[np.delete(indices_arr,i)]
    y_train=y[np.delete(indices_arr,i)]
    X_test=X[i]
    y_test=y[i]
    
    p=Perceptron()
    p.train(X_train,y_train)
    errors.append(p.errors(X_test,y_test))
    obj.append(p)


In [14]:
min(errors) #MINIMUM ERROR

0.11678832116788321

In [15]:
print("cross-validated test set error is : ",np.mean(errors))

cross-validated test set error is :  0.21860784935999153


In [16]:
pos=errors.index(min(errors))
p=obj[pos]
val_x=X[chunk[pos]]
val_y=y[chunk[pos]]

In [17]:
# list(zip(val_x,val_y))

In [18]:
# val_x,val_y

## 1d)**F1 Score**

$F1 = \frac{TP}{TP+\frac{1}{2}(FP+FN)}$

In [19]:
def F1score(X,y):
    TP=0   #TRUEPOSITIVE
    FP=0   ##FALSEPOSITIVE
    FN=0   #FALSENEGATIVE
    for i in range(X.shape[0]):    
        
        prediction=p.predict(X[i])      #STORING ORIGINAL CLASSLABEL
        if y[i]==1:
            if prediction==1:
                TP+=1
            else:
                FN+=1
            
        else:
            if prediction==1:
                FP+=1
    if TP==FP and TP==FN and TP==0:
        return 0    
    return TP/(TP+0.5*(FP+FN))

In [20]:
#TAKING THE OPTIMAL CROSS VALIDATED CHUNK  AND CALCULATING F1SCORE
x_val=X[chunk[pos]]
y_val=y[chunk[pos]]
print('F1SCORE: ',F1score(x_val,y_val))

F1SCORE:  0.8222222222222222


$$Bern(\frac{\pi_1 + \pi_2}{2})$$