# Importing libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy

# Importing dataset

In [2]:
df = pd.read_csv("data_banknote_authentication.txt",sep=",",header=None)
df.columns = ['variance','skewness','curtosis','entropy','class']

In [3]:
df.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


replacing 0 values in 'class' column by -1 because our perceptron algo works when class labes are +1 and -1

In [4]:
df['class'].replace(0,-1, inplace = True)

In [5]:
df.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,-1
1,4.5459,8.1674,-2.4586,-1.4621,-1
2,3.866,-2.6383,1.9242,0.10645,-1
3,3.4566,9.5228,-4.0112,-3.5944,-1
4,0.32924,-4.4552,4.5718,-0.9888,-1


Checking for null values in dataset

In [6]:
df.isnull().sum()

variance    0
skewness    0
curtosis    0
entropy     0
class       0
dtype: int64

In [7]:
df['class'].value_counts()

-1    762
 1    610
Name: class, dtype: int64

In [8]:
# function to find the W vector
def perceptron_algo(df, w0, learn_rate = .1, epochs = 1000):
    w = w0
    flag = True
    for _ in range(epochs):
        # getting random row from dataset
        rand_row = df.sample()
        x_n = rand_row.iloc[:,:-1].values
        y_n = rand_row.iloc[:,-1].values
        prediction = (y_n * numpy.dot(w,x_n.T)).item(0)
        # if prediction<0 means current w made a mistake on (x_n,y_n) therefore we will change w
        if prediction<0:
            w = w + numpy.dot(y_n,x_n)
    return w

In [9]:
# initialing w array as array contains all 1's 
w0 = numpy.ones([1,df.shape[1]-1], dtype=int)
W = perceptron_algo(df, w0)
print("W returned by perceptron algo is")
print(W[0])


W returned by perceptron algo is
[-26.519596 -29.627655  -7.480655 -19.040518]


In [10]:
# this function will predict the 'class' using weight vector W
# function return predicted values along with percentage error
def predict(df,W):
    X = df.iloc[:,:-1].values
    Y = df.iloc[:,-1].values
    Y_pred = []
    for i in range(X.shape[0]):
        temp = numpy.dot(W,X[i].T).item(0)
        # if sign is negative means class is -1
        if temp<0:
            Y_pred.append(-1)
        # if sign is positive means class is +1
        else:
            Y_pred.append(1)
    # calcuating percetage error
    error = 100 - numpy.sum(numpy.equal(Y,Y_pred))/len(Y)*100
    return [Y_pred, error]

In [11]:
res = predict(df,W)
output =pd.DataFrame(columns=['Actual_Class','Predicted_Class'])
output['Actual_Class'] = df['class']
output['Predicted_Class'] = res[0]
output

Unnamed: 0,Actual_Class,Predicted_Class
0,-1,-1
1,-1,-1
2,-1,-1
3,-1,-1
4,-1,1
...,...,...
1367,1,-1
1368,1,1
1369,1,1
1370,1,1


In [12]:
print("percentage of error is",res[1])

percentage of error is 15.889212827988345


In [13]:
# splitting data in to train and test set
train=df.sample(frac=0.8,random_state=100)
test=df.drop(train.index).sample(frac=1.0)

In [14]:
def K_Fold(data,K):
    data = data.sample(frac=1.0,random_state=100)
    # splitting array into K folds  
    folds=numpy.array_split(data, K)
    min_error = numpy.inf
    for j in range(K):
        total = folds[:j]+folds[j+1:]
        train = pd.concat(total)
        test = folds[j]
        W = perceptron_algo(train, w0)
        error = predict(test,W)[1]
        if error<min_error:
            min_error = error
            best_W = W
    return [min_error, best_W]

In [15]:
res = K_Fold(df,10)
min_error = res[0]
opt_W = res[1]
print("optimal W is",min_error)
print("minimum error is",opt_W)

optimal W is 2.173913043478265
minimum error is [[-33.246199 -25.088463 -19.341566 -16.07175 ]]


In [16]:
# this function find the f1 score
def find_F1_Score(data, W):
    df = pd.DataFrame()
    df['Y'] = data.iloc[:,-1].values
    df['Y_pred'] = predict(data,W)[0]
    df_pos = df[df['Y']==1]
    df_neg = df[df['Y']==-1]
    TP = df_pos[df_pos['Y']==df_pos['Y_pred']].shape[0]
    TN = df_neg[df_neg['Y']==df_neg['Y_pred']].shape[0]
    FP = df_pos[df_pos['Y']!=df_pos['Y_pred']].shape[0]
    FN = df_neg[df_neg['Y']!=df_neg['Y_pred']].shape[0]
    # print(TP)
    # print(FP)
    # print(TN)
    # print(FN)
    precision = TP/(FP+TP)
    recall = TP/(FN+TP)
    F1_score= (2*recall*precision)/(precision+recall)
    return F1_score

In [17]:
f1_score = find_F1_Score(test,opt_W)
print("F1 Score is",f1_score)

F1 Score is 0.9170305676855894
