In [52]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix

In [53]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [None]:
df = pd.read_csv("../data/penguins.csv")

print(df.iloc[100:151]['species'])

In [55]:
df = df.fillna("Unknown")

gender_encoder = LabelEncoder()
df["gender"] = gender_encoder.fit_transform(df["gender"])

species_encoder = OneHotEncoder()
species = species_encoder.fit_transform(df["species"].values.reshape(-1, 1)).toarray()
n_labels = species.shape[1]


In [56]:
scaler = MinMaxScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

ValueError: could not convert string to float: 'Adelie'

In [None]:
features = list(set(df.drop(["species"], axis=1).columns))

print(features)

['bill_depth_mm', 'body_mass_g', 'bill_length_mm', 'gender', 'flipper_length_mm']


In [None]:
Y_full = species
X_full = df.drop(["species"], axis=1)

X_train, X_test, Y_train, Y_test = train_test_split(
    X_full, Y_full, random_state=0, train_size=.7)

In [None]:
X_train = np.array(X_train).reshape(X_train.shape[1], X_train.shape[0])
X_test = np.array(X_test).reshape(X_test.shape[1], X_test.shape[0])

Y_train = np.array(Y_train).reshape(n_labels, Y_train.shape[1])
Y_test = np.array(Y_test).reshape(n_labels, Y_test.shape[1])

print("X_train.shape", X_train.shape)
print("X_test.shape", X_test.shape)
print("Y_train.shape", Y_train.shape)
print("Y_test.shape", Y_test.shape)

X_train.shape (5, 105)
X_test.shape (5, 45)
Y_train.shape (3, 105)
Y_test.shape (3, 45)


### first layer

In [None]:
# 8 neurons in the hidden layer
W1 = np.random.randn(8, X_train.shape[0])

B1 = np.zeros((8, 1))

Z1 = np.dot(W1, X_train) + B1
A1 = np.tanh(Z1)

### second layer

In [None]:
W2 = np.random.randn(3, 8)
B2 = np.random.randn(3, 1)

Z2 = np.dot(W2, A1) + B2

A2 = sigmoid(Z2)

A2.shape

(3, 105)

In [None]:
eta = 0.001

### Back prop

In [None]:
for i in range(10000):
    dZ2 = A2 - Y_train
    dW2 = (1 / X_train.shape[1]) * np.dot(dZ2, A1.T)
    dB2 = (1 / X_train.shape[1]) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * (1 - np.power(A1, 2))
    dW1 = (1 / X_train.shape[1]) * np.dot(dZ1, X_train.T)
    dB1 = (1 / X_train.shape[1]) * np.sum(dZ1, axis=1, keepdims=True)

    W2 = W2 - eta * dW2
    B2 = B2 - eta * dB2
    W1 = W1 - eta * dW1
    B1 = B1 - eta * dB1


In [None]:
prediction = sigmoid(np.dot(W2, np.tanh(np.dot(W1, X_test) + B1)) + B2)

prediction.shape

for i in range(prediction.shape[1]):
    max = 0
    max_index = 0
    for j in range(prediction.shape[0]):
        if prediction[j][i] > max:
            max = prediction[j][i]
            max_index = j
        prediction[max_index][i] = 1
    for j in range(prediction.shape[0]):
        if j != max_index:
            prediction[j][i] = 0

# accuracy
correct = 0
for i in range(prediction.shape[1]):
    if np.array_equal(prediction[:, i], Y_test[:, i]):
        correct += 1

print("Accuracy:", correct / prediction.shape[1])



Accuracy: 0.044444444444444446


In [None]:
Y_test.shape

(3, 45)