## Imports

In [1]:
import pandas as pd
import numpy as np
from SLP import SLP

## Data Reading

In [2]:
all_data = pd.read_csv('birds.csv')

In [3]:
print(all_data.head())

   gender  body_mass  beak_length  beak_depth  fin_length bird category
0    male       3750         39.1        18.7         181             A
1  female       3800         39.5        17.4         186             A
2  female       3250         40.3        18.0         195             A
3  female       3500         39.6        17.7         186             A
4  female       3450         36.7        19.3         193             A


In [4]:
print(all_data.iloc[0:2, 1:])

   body_mass  beak_length  beak_depth  fin_length bird category
0       3750         39.1        18.7         181             A
1       3800         39.5        17.4         186             A


## Data Slicing

### Feature Selecting

In [5]:
feature_index = {'gender':0,'body_mass':1, 'beak_length':2, 'beak_depth':3, 'fin_length':4}

In [6]:
feature1, feature2 = 'body_mass', 'gender'  # will be taken from the gui
f1, f2 = feature_index[feature1], feature_index[feature2]

### Class Selecting 

In [7]:
class_index = {'A':0, 'B':1, 'C':2}

In [8]:
class1, class2 = 'A', 'C' # will be taken from the gui
b1, b2 = class_index[class1]*50, class_index[class2]*50
e1, e2 = b1+50, b2+50

print(f"Class {class1}: {b1} -> {e1}")
print(f"Class {class2}: {b2} -> {e2}")

Class A: 0 -> 50
Class C: 100 -> 150


### Slicing

In [9]:
C1 = all_data.iloc[b1:e1,[f1, f2, 5]]
C2 = all_data.iloc[b2:e2,[f1, f2, 5]]

print(C1.shape)
print(C2.shape)

(50, 3)
(50, 3)


## Data Shuffling

In [10]:
train = pd.concat([C1.iloc[0:30], C2.iloc[0:30]])
test = pd.concat([C1.iloc[30:], C2.iloc[30:]])

print(train.shape)
print(test.shape)

(60, 3)
(40, 3)


In [None]:
train.head()

Unnamed: 0,body_mass,gender,bird category
0,3750,male,A
1,3800,female,A
2,3250,female,A
3,3500,female,A
4,3450,female,A


In [12]:
# shuffle
train = train.sample(frac=1).reset_index(drop=True)
test = test.sample(frac=1).reset_index(drop=True)

In [13]:
train.head()

Unnamed: 0,body_mass,gender,bird category
0,3300,female,C
1,3450,female,C
2,3625,female,A
3,3250,female,A
4,3800,female,A


## Data Spliting

In [14]:
X_train = train.iloc[:,0:2]
X_test = test.iloc[:,0:2]

print(X_train.shape)
print(X_test.shape)

(60, 2)
(40, 2)


In [15]:
print(X_train.head())

   body_mass  gender
0       3300  female
1       3450  female
2       3625  female
3       3250  female
4       3800  female


In [16]:
Y_train = pd.DataFrame(train.iloc[:,2])
Y_test = pd.DataFrame(test.iloc[:,2])

print(Y_train.shape)
print(Y_test.shape)

(60, 1)
(40, 1)


In [17]:
print(Y_train.head())

  bird category
0             C
1             C
2             A
3             A
4             A


# Preprocessing

In [18]:
def pre_gender(col):
    # Replace 'male' with 1 and 'female' with 0 in the 'gender' column
    col = col.replace({'male': 1, 'female': 0})

    # replace null values with the mode
    # print(f"Null: #{col.isnull().sum()}")
    mode_value = col.mode()[0]
    col.fillna(mode_value, inplace=True)
    # print(f"Null: #{col.isnull().sum()}")

    return col

In [19]:
def normalize(col):
    min_value = col.min()
    max_value = col.max()
    col = (col - min_value) / (max_value - min_value)
    return col

In [20]:
def outliers(col):
    # Calculate Q1 (25th percentile) and Q3 (75th percentile)
    Q1 = col.quantile(0.25)
    Q3 = col.quantile(0.75)
    IQR = Q3 - Q1  # Interquartile Range
    
    # Define the bounds for outliers
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    # Calculate the mean of the column
    mean_value = col.mean()
    
    # Replace outliers with the lower and upper bounds
    col = col.clip(lower=lower_bound, upper=upper_bound)

    return col

In [21]:
def pre_body_mass(col):
    outliers(col)
    col = normalize(col)
    return col

In [22]:
def pre_beak_length(col):
    outliers(col)
    col = normalize(col)
    return col

In [23]:
def pre_beak_depth(col):
    outliers(col)
    col = normalize(col)
    return col

In [24]:
def pre_fin_length(col):
    outliers(col)
    col = normalize(col)
    return col

In [25]:
def pre_bird_category(col):
    col = col.replace({class1: 1, class2: -1})
    return col

In [26]:
def preprocessing(data):

    for col in data.columns:
        print(f"Column: {col}")
        if col == 'gender':
            data[col] = pre_gender(data[col])
        elif col == 'body_mass':
            data[col] = pre_body_mass(data[col])
        elif col == 'beak_length':
            data[col] = pre_beak_length(data[col])
        elif col == 'beak_depth':
            data[col] = pre_beak_depth(data[col])
        elif col == 'fin_length':
            data[col] = pre_fin_length(data[col])
        elif col == 'bird category':
            data[col] = pre_bird_category(data[col])


# Training

### Preprocessing

In [27]:
preprocessing(X_train)
preprocessing(Y_train)

Column: body_mass
Column: gender
Column: bird category


In [28]:
X_train.head()

Unnamed: 0,body_mass,gender
0,0.225352,0.0
1,0.309859,0.0
2,0.408451,0.0
3,0.197183,0.0
4,0.507042,0.0


In [29]:
slp_model = SLP(learning_rate=0.01, n_epochs=1000)
slp_model.fit(X=X_train, Y=Y_train)

# Testing

In [30]:
preprocessing(X_test)
preprocessing(Y_test)

Column: body_mass
Column: gender
Column: bird category


# Evaluation

In [31]:
accuracy = slp_model.predict(X=X_train, Y=Y_train)
print(f"accuracy = {accuracy}")

accuracy = 0.5


In [32]:
accuracy = slp_model.predict(X=X_test, Y=Y_test)
print(f"accuracy = {accuracy}")

accuracy = 0.525
