## Imports

In [26]:
import pandas as pd
import numpy as np
from SLP import SLP
from Preprocessing import preprocessing

## Data Reading

In [27]:
all_data = pd.read_csv('birds.csv')

In [28]:
all_data.head()

Unnamed: 0,gender,body_mass,beak_length,beak_depth,fin_length,bird category
0,male,3750,39.1,18.7,181,A
1,female,3800,39.5,17.4,186,A
2,female,3250,40.3,18.0,195,A
3,female,3500,39.6,17.7,186,A
4,female,3450,36.7,19.3,193,A


## Data Slicing

### Feature Selecting

In [29]:
feature_index = {'gender':0,'body_mass':1, 'beak_length':2, 'beak_depth':3, 'fin_length':4}

In [30]:
feature1, feature2 = 'body_mass', 'beak_length'  # will be taken from the gui
f1, f2 = feature_index[feature1], feature_index[feature2]

### Class Selecting 

In [31]:
class_index = {'A':0, 'B':1, 'C':2}

In [32]:
class1, class2 = 'A', 'C' # will be taken from the gui
b1, b2 = class_index[class1]*50, class_index[class2]*50
e1, e2 = b1+50, b2+50

print(f"Class {class1}: {b1} -> {e1}")
print(f"Class {class2}: {b2} -> {e2}")

Class A: 0 -> 50
Class C: 100 -> 150


### Slicing

In [33]:
C1 = all_data.iloc[b1:e1,[f1, f2, 5]]
C2 = all_data.iloc[b2:e2,[f1, f2, 5]]

print(C1.shape)
print(C2.shape)

(50, 3)
(50, 3)


In [34]:
# shuffle
C1 = C1.sample(frac=1).reset_index(drop=True)
C2 = C2.sample(frac=1).reset_index(drop=True)

## Data Splitting

In [35]:
train = pd.concat([C1.iloc[0:30], C2.iloc[0:30]])
test = pd.concat([C1.iloc[30:], C2.iloc[30:]])

print(train.shape)
print(test.shape)

(60, 3)
(40, 3)


In [36]:
# shuffle
train = train.sample(frac=1).reset_index(drop=True)
test = test.sample(frac=1).reset_index(drop=True)

In [37]:
train.head()

Unnamed: 0,body_mass,beak_length,bird category
0,3600,37.7,A
1,3600,42.4,C
2,2975,37.5,A
3,4500,53.5,C
4,3900,40.9,A


In [38]:
X_train = train.iloc[:,0:2]
X_test = test.iloc[:,0:2]

print(X_train.shape)
print(X_test.shape)

(60, 2)
(40, 2)


In [39]:
X_train.head()

Unnamed: 0,body_mass,beak_length
0,3600,37.7
1,3600,42.4
2,2975,37.5
3,4500,53.5
4,3900,40.9


In [40]:
Y_train = pd.DataFrame(train.iloc[:,2])
Y_test = pd.DataFrame(test.iloc[:,2])

print(Y_train.shape)
print(Y_test.shape)

(60, 1)
(40, 1)


In [41]:
Y_train.head()

Unnamed: 0,bird category
0,A
1,C
2,A
3,C
4,A


# Training

In [42]:
preprocessing(data=X_train)
preprocessing(data=Y_train, classes=[class1, class2])

Column: body_mass
Column: beak_length
Column: bird category


In [43]:
X_train.head()

Unnamed: 0,body_mass,beak_length
0,0.461538,0.150628
1,0.461538,0.34728
2,0.141026,0.142259
3,0.923077,0.811715
4,0.615385,0.284519


In [52]:
slp_model = SLP(learning_rate=0.01, n_epochs=50)
slp_model.fit(X=X_train, Y=Y_train)

In [53]:
Y_train_pred = slp_model.predict(X=X_train, column_name='bird category')
Y_train_pred.head()

Unnamed: 0,bird category
0,1
1,1
2,1
3,-1
4,1


# Testing

In [54]:
preprocessing(data=X_test)
preprocessing(data=Y_test, classes=[class1, class2])

Column: body_mass
Column: beak_length
Column: bird category


In [55]:
Y_test_pred = slp_model.predict(X=X_test, column_name='bird category')
Y_test_pred.head()

Unnamed: 0,bird category
0,1
1,-1
2,1
3,-1
4,1


# Evaluation

## Train

In [56]:
accuracy = slp_model.accuracy_score(Y=Y_train, Y_predict=Y_train_pred)
print(f"train accuracy = {accuracy}")

train accuracy = 0.95


In [57]:
confusion_matrix = slp_model.confusion_matrix(Y=Y_train, Y_pred=Y_train_pred)
confusion_matrix

Unnamed: 0,Predicted Positive,Predicted Negative
Actual Positive,30,0
Actual Negative,3,27


## Test

In [58]:
accuracy = slp_model.accuracy_score(Y=Y_test, Y_predict=Y_test_pred)
print(f"train accuracy = {accuracy}")

train accuracy = 0.9


In [59]:
confusion_matrix = slp_model.confusion_matrix(Y=Y_test, Y_pred=Y_test_pred)
confusion_matrix

Unnamed: 0,Predicted Positive,Predicted Negative
Actual Positive,16,4
Actual Negative,0,20
