# Task 2-2

In [98]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout, Flatten
from tensorflow.python.keras.optimizers import adam_v2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from numpy import argmax
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
import pandas as pd
import numpy as np
import itertools

In [99]:
pd.set_option('display.max_columns', 500) 
pd.set_option('display.max_rows', 500) 

In [100]:
df = pd.read_csv('./penguins.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species            344 non-null    object 
 1   island             344 non-null    object 
 2   bill_length_mm     342 non-null    float64
 3   bill_depth_mm      342 non-null    float64
 4   flipper_length_mm  342 non-null    float64
 5   body_mass_g        342 non-null    float64
 6   sex                333 non-null    object 
dtypes: float64(4), object(3)
memory usage: 18.9+ KB


In [101]:
df = df.dropna()

In [102]:
df = df.sample(frac=1)

In [103]:
df.head(10)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
70,Adelie,Torgersen,33.5,19.0,190.0,3600.0,female
33,Adelie,Dream,40.9,18.9,184.0,3900.0,male
281,Chinstrap,Dream,45.2,17.8,198.0,3950.0,female
37,Adelie,Dream,42.2,18.5,180.0,3550.0,female
94,Adelie,Dream,36.2,17.3,187.0,3300.0,female
198,Gentoo,Biscoe,45.5,13.9,210.0,4200.0,female
259,Gentoo,Biscoe,53.4,15.8,219.0,5500.0,male
248,Gentoo,Biscoe,49.4,15.8,216.0,4925.0,male
61,Adelie,Biscoe,41.3,21.1,195.0,4400.0,male
71,Adelie,Torgersen,39.7,18.4,190.0,3900.0,male


In [104]:
df['species'].unique()

array(['Adelie', 'Chinstrap', 'Gentoo'], dtype=object)

In [105]:
X = df.drop('species', axis=1)
y = df['species']

In [106]:
num_arguments = X.select_dtypes(exclude=['object']).columns
num_arguments

Index(['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g'], dtype='object')

In [107]:
cat_arguments = X.select_dtypes(include=['object']).columns
cat_arguments

Index(['island', 'sex'], dtype='object')

In [108]:
scaler_X = MinMaxScaler()
scaler_X.fit(X[num_arguments])
X[num_arguments] = scaler_X.transform(X[num_arguments])

In [109]:
for arg in cat_arguments:
    one_hot = pd.get_dummies(X[arg], prefix=arg)
    X = X.drop(arg, axis = 1)
    X = X.join(one_hot)

In [110]:
y

70        Adelie
33        Adelie
281    Chinstrap
37        Adelie
94        Adelie
198       Gentoo
259       Gentoo
248       Gentoo
61        Adelie
71        Adelie
87        Adelie
78        Adelie
86        Adelie
241       Gentoo
93        Adelie
73        Adelie
305    Chinstrap
336    Chinstrap
229       Gentoo
74        Adelie
239       Gentoo
4         Adelie
221       Gentoo
145       Adelie
52        Adelie
314    Chinstrap
194       Gentoo
228       Gentoo
247       Gentoo
343    Chinstrap
27        Adelie
187       Gentoo
151       Adelie
323    Chinstrap
262       Gentoo
35        Adelie
20        Adelie
138       Adelie
320    Chinstrap
53        Adelie
311    Chinstrap
148       Adelie
84        Adelie
269       Gentoo
249       Gentoo
192       Gentoo
324    Chinstrap
317    Chinstrap
217       Gentoo
339    Chinstrap
88        Adelie
288    Chinstrap
235       Gentoo
155       Gentoo
100       Adelie
297    Chinstrap
263       Gentoo
50        Adelie
225       Gent

In [113]:
X.loc[[169, 277, 44]]

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,island_Biscoe,island_Dream,island_Torgersen,sex_female,sex_male
169,0.621818,0.25,0.830508,1.0,1,0,0,0,1
277,0.650909,0.761905,0.40678,0.333333,0,1,0,0,1
44,0.178182,0.452381,0.220339,0.083333,0,1,0,1,0


In [111]:
encoder_y = LabelEncoder()
y = encoder_y.fit_transform(y)

In [115]:
label_dict = {
    2: 'Gentoo',
    1: 'Chinstrap',
    0: 'Adelie',
}

In [116]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [117]:
n_features = X_train.shape[1]

In [118]:
X_train.shape

(233, 9)

In [119]:
n_features

9

In [120]:
def single_layer(neurons, learning_rate):
    model = Sequential([
        Dense(neurons, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)),
        Dense(3, activation='softmax')
    ])
    
    model.compile(optimizer=adam_v2.Adam(learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [121]:
def multi_layer(neurons, hidden_layers, learning_rate, dropout=.5):
    model = Sequential()
    model.add(Dense(10, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))

    for i in range(hidden_layers):
        model.add(Dense(neurons, activation='relu'))
        model.add(Dropout(dropout))

    model.add(Dense(3, activation='softmax'))
    
    model.compile(optimizer=adam_v2.Adam(learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    return model


In [122]:
neurons_list = [2, 36]
hidden_layers_list = [0, 3]
dropouts = [0.2, 0.4]
epochs_list = [3, 100]
learning_rates = [0.1, 0.01]
batch_sizes = [8, 32]

In [123]:
single_layer_results_table = pd.DataFrame(columns=['accuracy', 'loss', 'learning_rate', 'neurons', 'epochs', 'batch_size'])
multi_layer_results_table = pd.DataFrame(columns=['accuracy', 'loss', 'learning_rate', 'neurons', 'hidden_layers', 'dropout', 'epochs', 'batch_size'])

In [124]:
sl_parameters = [
    neurons_list,
    epochs_list,
    learning_rates,
    batch_sizes
]

ml_parameters = [
    neurons_list,
    hidden_layers_list,
    dropouts,
    epochs_list,
    learning_rates,
    batch_sizes
]


In [125]:
for neurons, epochs, learning_rate, batch_size in list(itertools.product(*sl_parameters)):
    sl_model = single_layer(neurons, learning_rate)
    
    history = sl_model.fit(X_train, y_train, epochs=epochs, validation_split=0.2, batch_size=batch_size, verbose=0)
    loss, accuracy = sl_model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)

    d_row = {
        'accuracy': accuracy, 
        'loss': loss, 
        'learning_rate': learning_rate,
        'neurons': neurons,
        'epochs': epochs,
        'batch_size': batch_size
    }
    
    single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)

  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_layer_results_table = single_layer_results_table.append(d_row,ignore_index=True)
  single_l

In [126]:
for neurons, hidden_layers, dropout, epochs, learning_rate, batch_size in list(itertools.product(*ml_parameters)):
    ml_model = multi_layer(neurons, hidden_layers, learning_rate, dropout)
    
    history = ml_model.fit(X_train, y_train, epochs=epochs, validation_split=0.2, batch_size=batch_size, verbose=0)
    loss, accuracy = ml_model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)

    d_row = {
        'accuracy': accuracy, 
        'loss': loss, 
        'learning_rate': learning_rate,
        'neurons': neurons,
        'hidden_layers': hidden_layers, 
        'dropout': dropout,
        'epochs': epochs,
        'batch_size': batch_size
    }
    
    multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)

  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = multi_layer_results_table.append(d_row,ignore_index=True)
  multi_layer_results_table = mu

In [127]:
single_layer_results_table.sort_values(by='accuracy')

Unnamed: 0,accuracy,loss,learning_rate,neurons,epochs,batch_size
3,0.44,1.074527,0.01,2.0,3.0,32.0
2,0.73,0.74693,0.01,2.0,3.0,8.0
1,0.79,0.526681,0.1,2.0,3.0,32.0
0,0.8,0.283831,0.1,2.0,3.0,8.0
11,0.94,0.330707,0.01,36.0,3.0,32.0
4,0.96,0.097646,0.1,2.0,100.0,8.0
10,0.98,0.182312,0.01,36.0,3.0,8.0
8,0.99,0.038478,0.1,36.0,3.0,8.0
9,0.99,0.045127,0.1,36.0,3.0,32.0
5,1.0,0.002894,0.1,2.0,100.0,32.0


In [128]:
multi_layer_results_table.sort_values(by='accuracy')

Unnamed: 0,accuracy,loss,learning_rate,neurons,hidden_layers,dropout,epochs,batch_size
17,0.36,1.056831,0.1,2.0,3.0,0.2,3.0,32.0
31,0.44,0.834254,0.01,2.0,3.0,0.4,100.0,32.0
29,0.44,1.052026,0.1,2.0,3.0,0.4,100.0,32.0
28,0.44,1.052296,0.1,2.0,3.0,0.4,100.0,8.0
27,0.44,1.069912,0.01,2.0,3.0,0.4,3.0,32.0
26,0.44,0.908661,0.01,2.0,3.0,0.4,3.0,8.0
25,0.44,1.052186,0.1,2.0,3.0,0.4,3.0,32.0
24,0.44,1.051574,0.1,2.0,3.0,0.4,3.0,8.0
21,0.44,1.053391,0.1,2.0,3.0,0.2,100.0,32.0
20,0.44,1.05096,0.1,2.0,3.0,0.2,100.0,8.0


In [None]:
scaled_samples = np.array([
    [49.2, 15.2, 221.0, 6300.0],
    [50.0, 19.5, 196.0, 3900.0],
    [37.0, 16.9, 185.0, 3000.0],
], dtype=np.float32)
scaled_samples = scaler_X.transform(scaled_samples)


In [130]:
categorical_samples = np.array([
    [1, 0, 0, 0, 1],
    [0, 1, 0, 0, 1],
    [0, 1, 0, 1, 0],
])

In [131]:
samples = np.concatenate((scaled_samples, categorical_samples), axis=1)
samples

array([[0.62181824, 0.25      , 0.83050859, 1.        , 1.        ,
        0.        , 0.        , 0.        , 1.        ],
       [0.65090913, 0.76190472, 0.40677965, 0.33333337, 0.        ,
        1.        , 0.        , 0.        , 1.        ],
       [0.17818184, 0.4523809 , 0.22033894, 0.08333331, 0.        ,
        1.        , 0.        , 1.        , 0.        ]])

In [132]:
ml_yhat = ml_model.predict(samples)
sl_yhat = sl_model.predict(samples)

for i in range(len(ml_yhat)):
    print('SL Class prediction {} : {}'.format(i+1, label_dict[int(argmax(sl_yhat[i]))]))
    print('ML Class prediction {} : {}'.format(i+1, label_dict[int(argmax(ml_yhat[i]))]))

SL Class prediction 1 : Gentoo
ML Class prediction 1 : Gentoo
SL Class prediction 2 : Chinstrap
ML Class prediction 2 : Chinstrap
SL Class prediction 3 : Adelie
ML Class prediction 3 : Adelie
