## Imports

In [294]:
import pandas as pd
import numpy as np

## Data Reading

In [295]:
all_data = pd.read_csv('birds.csv')

In [296]:
print(all_data.head())

   gender  body_mass  beak_length  beak_depth  fin_length bird category
0    male       3750         39.1        18.7         181             A
1  female       3800         39.5        17.4         186             A
2  female       3250         40.3        18.0         195             A
3  female       3500         39.6        17.7         186             A
4  female       3450         36.7        19.3         193             A


In [297]:
print(all_data.iloc[0:2, 1:])

   body_mass  beak_length  beak_depth  fin_length bird category
0       3750         39.1        18.7         181             A
1       3800         39.5        17.4         186             A


## Data Slicing

### Feature Selecting

In [298]:
feature_index = {'gender':0,'body_mass':1, 'beak_length':2, 'beak_depth':3, 'fin_length':4}

In [299]:
feature1, feature2 = 'body_mass', 'gender'  # will be taken from the gui
f1, f2 = feature_index[feature1], feature_index[feature2]

### Class Selecting 

In [300]:
class_index = {'A':0, 'B':1, 'C':2}

In [301]:
class1, class2 = 'A', 'C' # will be taken from the gui
b1, b2 = class_index[class1]*50, class_index[class2]*50
e1, e2 = b1+50, b2+50

print(f"Class {class1}: {b1} -> {e1}")
print(f"Class {class2}: {b2} -> {e2}")

Class A: 0 -> 50
Class C: 100 -> 150


### Slicing

In [302]:
C1 = all_data.iloc[b1:e1,[f1, f2, 5]]
C2 = all_data.iloc[b2:e2,[f1, f2, 5]]

print(C1.shape)
print(C2.shape)

(50, 3)
(50, 3)


## Data Spliting

In [303]:
X_train = pd.concat([C1.iloc[0:30,0:2], C2.iloc[0:30,0:2]])
X_test = pd.concat([C1.iloc[30:,0:2], C2.iloc[30:,0:2]])

print(X_train.shape)
print(X_test.shape)

(60, 2)
(40, 2)


In [304]:
print(X_train.head())

   body_mass  gender
0       3750    male
1       3800  female
2       3250  female
3       3500  female
4       3450  female


In [305]:
Y_train = pd.DataFrame(pd.concat([C1.iloc[0:30, 2], C2.iloc[0:30, 2]], axis=0))
Y_test = pd.DataFrame(pd.concat([C1.iloc[30:,2], C2.iloc[30:,2]], axis=0))

print(Y_train.shape)
print(Y_test.shape)

(60, 1)
(40, 1)


In [306]:
print(Y_train.head())

  bird category
0             A
1             A
2             A
3             A
4             A


# Preprocessing

In [307]:
def pre_gender(col):
    # Replace 'male' with 1 and 'female' with 0 in the 'gender' column
    col = col.replace({'male': 1, 'female': 0})

    # replace null values with the mode
    print(f"Null: #{col.isnull().sum()}")
    mode_value = col.mode()[0]
    col.fillna(mode_value, inplace=True)
    print(f"Null: #{col.isnull().sum()}")

    return col

In [308]:
def normalize(col):
    min_value = col.min()
    max_value = col.max()
    col = (col - min_value) / (max_value - min_value)
    return col

In [309]:
def outliers(col):
    # Calculate Q1 (25th percentile) and Q3 (75th percentile)
    Q1 = col.quantile(0.25)
    Q3 = col.quantile(0.75)
    IQR = Q3 - Q1  # Interquartile Range
    
    # Define the bounds for outliers
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    # Calculate the mean of the column
    mean_value = col.mean()
    
    # Replace outliers with the lower and upper bounds
    col = col.clip(lower=lower_bound, upper=upper_bound)

    return col

In [310]:
def pre_body_mass(col):
    outliers(col)
    col = normalize(col)
    return col

In [311]:
def pre_beak_length(col):
    outliers(col)
    col = normalize(col)
    return col

In [312]:
def pre_beak_depth(col):
    outliers(col)
    col = normalize(col)
    return col

In [313]:
def pre_fin_length(col):
    outliers(col)
    col = normalize(col)
    return col

In [402]:
def pre_bird_category(col):
    col = col.replace({class1: 1, class2: -1})
    return col

In [323]:
def preprocessing(data):

    for col in data.columns:
        print(f"Column: {col}")
        if col == 'gender':
            data[col] = pre_gender(data[col])
        elif col == 'body_mass':
            data[col] = pre_body_mass(data[col])
        elif col == 'beak_length':
            data[col] = pre_beak_length(data[col])
        elif col == 'beak_depth':
            data[col] = pre_beak_depth(data[col])
        elif col == 'fin_length':
            data[col] = pre_fin_length(data[col])
        elif col == 'bird category':
            data[col] = pre_bird_category(data[col])


# Models

## Single layer Perceptron

In [394]:
def SLP(X, Y, n_epochs, learning_rate, weights, bias):
    for epoch in range(n_epochs):
        for i in range(X.shape[0]):
            # Calculate the weighted sum
            linear_output = np.dot(X.iloc[i].values, weights) + bias

            # Apply signum function
            y_pred = 1 if linear_output >= 0 else -1
            y = Y.iloc[i, 0]

            # Update weights and bias if there's an error
            if y_pred != y:
                error = y - y_pred
                weights += learning_rate * error * X.iloc[i].values
                bias += learning_rate * error

    return weights, bias


## Adaline

# Training

### Preprocessing

In [395]:
preprocessing(X_train)
preprocessing(Y_train)
print(X_train.head())

Column: body_mass
Column: gender
Null: #0
Null: #0
Column: bird category
hereeee
   body_mass  gender
0   0.478873     1.0
1   0.507042     0.0
2   0.197183     0.0
3   0.338028     0.0
4   0.309859     0.0


### Fiting

In [396]:
# Perceptron parameters
learning_rate = 0.01
n_epochs = 100

# Initialize weights and bias
np.random.seed(30)
weights = np.random.rand(2)
bias = 0

print(weights)

[0.64414354 0.38074849]


In [398]:
# use model
weights, bias = SLP(X_train, Y_train, n_epochs, learning_rate, weights, bias)

In [399]:
print(weights)

[ 0.02667875 -0.01925151]


# Testing

In [403]:
preprocessing(X_test)
preprocessing(Y_test)

Column: body_mass
Column: gender
Null: #1
Null: #0
Column: bird category


# Evaluation

In [407]:
def predict(X, Y, weights, bias):
    TP, FP = 0, 0
    FN, TN = 0, 0
    for i in range(X.shape[0]):
        # Calculate the weighted sum
        linear_output = np.dot(X.iloc[i].values, weights) + bias

        # Apply signum function
        y_pred = 1 if linear_output >= 0 else -1
        y = Y.iloc[i, 0]

        if y_pred == 1:
            if y == 1: TP+=1 
            else: FP+=1
        elif y_pred == -1:
            if y == 1: FN+=1 
            else: TN+=1

    print(X.shape[0])
    accuracy = (TP + TN) / X.shape[0]

    return accuracy

In [408]:
accuracy = predict(X_train, Y_train, weights, bias)
print(f"accuracy = {accuracy}")

60
accuracy = 0.5


In [409]:
accuracy = predict(X_test, Y_test, weights, bias)
print(f"accuracy = {accuracy}")

40
accuracy = 0.5
