## Imports

In [28]:
import pandas as pd
import numpy as np
from SLP import SLP
from Preprocessing import preprocessing

## Data Reading

In [29]:
all_data = pd.read_csv('birds.csv')

In [30]:
all_data.head()

Unnamed: 0,gender,body_mass,beak_length,beak_depth,fin_length,bird category
0,male,3750,39.1,18.7,181,A
1,female,3800,39.5,17.4,186,A
2,female,3250,40.3,18.0,195,A
3,female,3500,39.6,17.7,186,A
4,female,3450,36.7,19.3,193,A


In [31]:
print(all_data.iloc[0:2, 1:])

   body_mass  beak_length  beak_depth  fin_length bird category
0       3750         39.1        18.7         181             A
1       3800         39.5        17.4         186             A


## Data Slicing

### Feature Selecting

In [32]:
feature_index = {'gender':0,'body_mass':1, 'beak_length':2, 'beak_depth':3, 'fin_length':4}

In [33]:
feature1, feature2 = 'body_mass', 'beak_length'  # will be taken from the gui
f1, f2 = feature_index[feature1], feature_index[feature2]

### Class Selecting 

In [34]:
class_index = {'A':0, 'B':1, 'C':2}

In [35]:
class1, class2 = 'A', 'C' # will be taken from the gui
b1, b2 = class_index[class1]*50, class_index[class2]*50
e1, e2 = b1+50, b2+50

print(f"Class {class1}: {b1} -> {e1}")
print(f"Class {class2}: {b2} -> {e2}")

Class A: 0 -> 50
Class C: 100 -> 150


### Slicing

In [36]:
C1 = all_data.iloc[b1:e1,[f1, f2, 5]]
C2 = all_data.iloc[b2:e2,[f1, f2, 5]]

print(C1.shape)
print(C2.shape)

(50, 3)
(50, 3)


## Data Shuffling

In [37]:
train = pd.concat([C1.iloc[0:30], C2.iloc[0:30]])
test = pd.concat([C1.iloc[30:], C2.iloc[30:]])

print(train.shape)
print(test.shape)

(60, 3)
(40, 3)


In [38]:
train.head()

Unnamed: 0,body_mass,beak_length,bird category
0,3750,39.1,A
1,3800,39.5,A
2,3250,40.3,A
3,3500,39.6,A
4,3450,36.7,A


In [39]:
# shuffle
train = train.sample(frac=1).reset_index(drop=True)
test = test.sample(frac=1).reset_index(drop=True)

In [40]:
train.head()

Unnamed: 0,body_mass,beak_length,bird category
0,3800,46.6,C
1,3800,50.6,C
2,3250,46.1,C
3,3800,39.5,A
4,3525,45.4,C


## Data Spliting

In [41]:
X_train = train.iloc[:,0:2]
X_test = test.iloc[:,0:2]

print(X_train.shape)
print(X_test.shape)

(60, 2)
(40, 2)


In [42]:
X_train.head()

Unnamed: 0,body_mass,beak_length
0,3800,46.6
1,3800,50.6
2,3250,46.1
3,3800,39.5
4,3525,45.4


In [43]:
Y_train = pd.DataFrame(train.iloc[:,2])
Y_test = pd.DataFrame(test.iloc[:,2])

print(Y_train.shape)
print(Y_test.shape)

(60, 1)
(40, 1)


In [44]:
Y_train.head()

Unnamed: 0,bird category
0,C
1,C
2,C
3,A
4,C


# Training

In [45]:
preprocessing(data=X_train)
preprocessing(data=Y_train, classes=[class1, class2])

Column: body_mass
Column: beak_length
Column: bird category


In [46]:
X_train.head()

Unnamed: 0,body_mass,beak_length
0,0.507042,0.523013
1,0.507042,0.690377
2,0.197183,0.502092
3,0.507042,0.225941
4,0.352113,0.472803


In [47]:
slp_model = SLP(learning_rate=0.01, n_epochs=1000)
slp_model.fit(X=X_train, Y=Y_train)

In [48]:
Y_train_pred = slp_model.predict(X=X_train, column_name='bird category')
Y_train_pred.head()

Unnamed: 0,bird category
0,-1
1,-1
2,-1
3,1
4,-1


# Testing

In [49]:
preprocessing(data=X_test)
preprocessing(data=Y_test, classes=[class1, class2])

Column: body_mass
Column: beak_length
Column: bird category


In [50]:
Y_test_pred = slp_model.predict(X=X_test, column_name='bird category')
Y_test_pred.head()

Unnamed: 0,bird category
0,-1
1,-1
2,-1
3,-1
4,-1


# Evaluation

## Train

In [51]:
accuracy = slp_model.accuracy_score(Y=Y_train, Y_predict=Y_train_pred)
print(f"train accuracy = {accuracy}")

train accuracy = 0.9666666666666667


In [52]:
confusion_matrix = slp_model.confusion_matrix(Y=Y_train, Y_pred=Y_train_pred)
confusion_matrix

Unnamed: 0,Predicted Positive,Predicted Negative
Actual Positive,29,1
Actual Negative,1,29


## Test

In [53]:
accuracy = slp_model.accuracy_score(Y=Y_test, Y_predict=Y_test_pred)
print(f"train accuracy = {accuracy}")

train accuracy = 0.95


In [54]:
confusion_matrix = slp_model.confusion_matrix(Y=Y_test, Y_pred=Y_test_pred)
confusion_matrix

Unnamed: 0,Predicted Positive,Predicted Negative
Actual Positive,19,1
Actual Negative,1,19
