# ___

# [ Machine Learning in Geosciences ]

**Department of Applied Geoinformatics and Carthography, Charles University** 

*Lukas Brodsky lukas.brodsky@natur.cuni.cz*


___

# Scikit-learn Neural Network 

Multi-Layer Perceptron architecture applied to Iris data set. 

In [None]:
# Sklearn API 

import os 
import numpy as np
import pandas as pd

# data set splitting
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# classifier
from sklearn.neural_network import MLPClassifier

# validation metrics
from sklearn import metrics
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.metrics import classification_report, confusion_matrix

# sample data
from sklearn import datasets

# plotting
import matplotlib.pyplot as plt 
# notebook solution
%matplotlib inline 
# seaborn works on top of matplotlib
import seaborn as sns

In [None]:
# load iris data from sklearn
# iris = datasets.load_iris()
# or from seaborn
df = sns.load_dataset('iris') 
df.head()

In [None]:
# view sample of the data set 
import matplotlib.image as mpimg
img = 'iris-dataset.png'
img = mpimg.imread('iris-dataset.png')
plt.figure(figsize=(15, 10), dpi=80)
plt.imshow(img)

In [None]:
# explore the data set
df.info()

In [None]:
# check dimensionality
df.shape

In [None]:
# vizualizace atributu - pruzkumova analyza 
sns.pairplot(data=df, hue = 'species')

### Prepare data

In [None]:
# Data preparation
df.columns

In [None]:
reference = df['species']
df1 = df.copy()
df1 = df1.drop('species', axis = 1)

In [None]:
# Defining the attributes 
X = df1

In [None]:
X.head()

In [None]:
reference

In [None]:
# kodovani trid 
le = LabelEncoder()
num_codes = le.fit_transform(reference) 
num_codes

In [None]:
# codes
spec_code  = pd.concat([df['species'], pd.DataFrame(num_codes)], axis=1)

for col in spec_code:
    print(spec_code[col].unique())

In [None]:
y = num_codes

In [None]:
# split data (randomely) 
# nahodne rozdeleni 
X_train, X_test, y_train, y_test = train_test_split(X , y, test_size = 0.7, random_state = 42)

print("Trenovaci mnozina ", X_train.shape)
print("Testovaci mnozina ", X_test.shape)

In [None]:
# model instance and parameters

# "hidden_layer_sizes": [(5), (10), (5,5), (10,10), (5,5,5), (10,10,10)]
# "activation": ["identity", "logistic", "tanh", "relu"]
# "learning_rate": ["constant", "invscaling", "adaptive"]
# "max_iter": [100, 200, 300, 500, 1000, 2000]

nn = MLPClassifier(hidden_layer_sizes=(5, 5, 5), activation='identity', learning_rate='adaptive', max_iter=2000)

In [None]:
### fitting the model
nn.fit(X_train, y_train)

In [None]:
# cross validation
# cv_strom = cross_validate(my_tree, X_train,y_train, cv=5, scoring='f1_macro', return_estimator=True)
# print('Average F1-skore: {:.3f} '.format(cv_strom['test_score'].mean()))

cv_nn = cross_validate(nn, X_train, y_train, cv=5, scoring='f1_macro', return_estimator=True)
print('Average F1-skore: {:.3f} '.format(cv_nn['test_score'].mean()))

In [None]:
# accuracy on the test data set 
print('Accuray F1-score on test set: {:.3f}'.format(
    round(f1_score(y_test, nn.predict(X_test), average='macro'), 3)))

In [None]:
y_pred = nn.predict(X_test)

In [None]:
# confusion matrix
cm = confusion_matrix(y_test, y_pred) 
plt.figure(figsize=(7,7))
sns.heatmap(data=cm,linewidths=.5, annot=True, square=True, cmap='Blues')
plt.ylabel('Reference')
plt.xlabel('Predicted class')