# Neuronal Networks

### Load Modules

In [10]:
import numpy as np
import pandas as pd
from PIL import Image
#from mnist import MNIST
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV,train_test_split, cross_val_score

### Funciones relevantes

In [2]:
def print_data(data):
    for row in data:
        print(''.join('{:3}'.format(value) for value in row))

In [3]:
def get_data(number):
    img = Image.open('sample%d_black_r.png'%(number)).convert('L')
    img_arr = np.array(img)
    WIDTH, HEIGHT = img.size
    data = list(img.getdata())
    data = [data[offset:offset+WIDTH] for offset in range(0, WIDTH*HEIGHT, WIDTH)]
    return data

In [4]:
def flatten_data(data):
    return[np.reshape(data, (28*28,))]

In [5]:
def classification_metrics(X, y, estimator):
    ls_scores_roc = cross_val_score(estimator=estimator, X=X, y=y, scoring="roc_auc", n_jobs=-1, cv=4)
    print(f"ROC media: {np.mean(ls_scores_roc):,.2f}, desviación estándar: {np.std(ls_scores_roc)}")

## Carga de datos

### Set de entrenamiento

In [8]:
mndata = MNIST('letters/')
mndata.select_emnist("letters")

NameError: name 'MNIST' is not defined

In [None]:
#images, labels = mndata.load_testing()
images, labels = mndata.load_training()

#### Convirtiendo a DataFrame

In [None]:
train = pd.DataFrame(data=images, columns=[f"{i+1}x{j+1}" for i in range(28) for j in range(28)])

In [None]:
train.head()

In [None]:
train["letter"] = [chr(ord('@')+x) for x in labels]

In [None]:
labels

In [None]:
train.head()

In [None]:
images, labels = mndata.load_testing()

In [None]:
train.head()

In [None]:
print_data(np.reshape(train.loc[0, [x for x in train.columns if x != "letter"]].values, (28, 28)))

In [None]:
val = pd.DataFrame(data=images, columns=[f"{i+1}x{j+1}" for i in range(28) for j in range(28)])
val["letter"] = [chr(ord('@')+x) for x in labels]

In [None]:
val.head()

### EDA

In [None]:
for i in range(0, 21600, 800):
    print_data(np.reshape(images[i], (28, 28)))
    print("\n\n")

In [None]:
train["letter"].value_counts(True)

### Separación de set

In [None]:
xo_train = train[train["letter"].isin(["O", "X"])]

In [None]:
xo_train.sample(6)

In [None]:
xo_train.shape

In [None]:
xo_train.to_csv("xo_train.csv", index = False)

In [None]:
xo_val = val[val["letter"].isin(["O", "X"])]
xo_val.to_csv("xo_val.csv", index = False)

In [None]:
X = xo_train[[x for x in xo_train.columns if x != "letter"]]
y = xo_train["letter"]

In [None]:
X.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.7)

In [None]:
X_val = xo_val[[x for x in xo_val.columns if x != "letter"]]
y_val = xo_val["letter"]

## Modelado

### Cross validation

In [None]:
mlp = MLPClassifier(max_iter=100)

In [None]:
mlp.fit(X_train, y_train)

In [None]:
classification_metrics(X=X_train, y=y_train, estimator=mlp)

In [None]:
mlp.score(X_test, y_test)

In [None]:
mlp.score(X_val, y_val)

### Hyperparametrización

In [None]:
param_grid = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

In [None]:
search = RandomizedSearchCV(param_distributions=param_grid, cv=4, n_jobs=-1, scoring="roc_auc", estimator=mlp, n_iter=10, verbose=5)

In [None]:
search.fit(X=xo_train[[x for x in xo_train if x != "letter"]], y=xo_train["letter"])

In [None]:
search.best_estimator_

In [None]:
search.best_score_

In [None]:
search.score(X_test, y_test)

In [None]:
search.score(X_val, y_val)

### Preservación de modelo ganador

In [None]:
pd.to_pickle(search.best_estimator_, "rnn_tictactoe.pickle")

### Evaluación

In [None]:
X_val.sample(10)

In [None]:
n= 18734

In [None]:
X_val.loc[n]

In [None]:
y_val.loc[n]

In [None]:
search.best_estimator_.predict(X_val.loc[[n]])