**Para Google Colab**

Tareas:
- Crear carpeta llamada `data` dentro de `content`
- Subir los archivos de la carpeta `data` desde el repositorio de GitHub

Instalar la sgte librería:

In [None]:
%pip install pycm

**Demo de exploración de datos y ANN**

In [292]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns

from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import manhattan_distances

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import Sequential
from keras.utils import to_categorical
from pycm import *

In [94]:
colors = ["#F7F467",
          "#5BF78A",
          "#E1AF7D",
          "#C59360",
          "#FE3DFC",
          "#F77C7F",
          "#59C23A",
          "#00F72C",
          "#466D40",
          "#89E14D",
          "#02424B",
          "#0EFCFE",
          "#F7924D",
          "#BFBFBF",
          "#FFFD33",
          "#0028FB"]

In [3]:
path = './data'

In [6]:
files = os.listdir(path)
files = [f for f in files if f.endswith('.csv') and f.startswith('B')]
files = sorted(files)

In [46]:
dfs = [pd.read_csv(os.path.join(path, f)) for f in files]

In [48]:
for i in range(len(dfs)):
    temp = dfs[i]
    temp = temp.drop(temp.columns[0], axis=1)
    if i != len(dfs) - 1:
        temp = temp.drop(['n', 'specie'], axis=1)
    if i == 0:
        df = temp
    else:
        
        df = pd.merge(df, temp, on='ID')

In [None]:
df.describe().T

In [None]:
desc = df.drop(['ID', 'n'], axis=1).describe().T
desc = desc.drop(['count'], axis=1)
plt.figure(figsize=(10, 6))
plt.plot(desc['min'], label='min')
plt.plot(desc['mean'], label='mean')
plt.plot(desc['max'], label='max')
plt.xticks(rotation=90)
plt.legend()
plt.show()

In [None]:
df

In [None]:
df['b02_Apr'].hist()

In [None]:
df['b04Apr'].hist()

In [None]:
df['b08_pen'].hist()

In [127]:
train, test = train_test_split(df, test_size=0.2, stratify=df['specie'])
train, val = train_test_split(train, test_size=0.25, stratify=train['specie'])

In [None]:
print(f'Size of train: {len(train)}')
print(f'Size of val: {len(val)}')
print(f'Size of test: {len(test)}')

In [None]:
train_temp

In [None]:
train_temp = train.copy()
train_temp = train_temp['specie'].value_counts()
train_temp = train_temp.sort_index()

plt.figure(figsize=(10, 6))
plt.bar(train_temp.index, train_temp.values, color=colors)
plt.title('Train data, no bootstrapping')
plt.xticks(rotation=90)
plt.show()

In [131]:
def sample_or_boostrap(df, limit):
    if len(df) > limit:
        return df.sample(replace=False, n=limit, random_state=1)
    else:
        res = df
        res2 = df.sample(n= limit - len(df), random_state=1, replace=True)
        return pd.concat([res, res2])

In [132]:
train = train.groupby('specie').apply(lambda x: sample_or_boostrap(x, limit=100)).reset_index(drop=True)
val = val.groupby('specie').apply(lambda x: sample_or_boostrap(x, limit=30)).reset_index(drop=True)
test = test.groupby('specie').apply(lambda x: sample_or_boostrap(x, limit=30)).reset_index(drop=True)

In [None]:
print(f'Size of train: {len(train)}')
print(f'Size of val: {len(val)}')
print(f'Size of test: {len(test)}')

In [None]:
train_temp = train.copy()
train_temp = train_temp['specie'].value_counts()
train_temp = train_temp.sort_index()

plt.figure(figsize=(10, 6))
plt.bar(train_temp.index, train_temp.values, color=colors)
plt.title('Train data, no bootstrapping')
plt.xticks(rotation=90)
plt.show()

In [84]:
train2 = train.drop(['ID','n','specie'], axis=1)

scaler = MinMaxScaler()
train2 = pd.DataFrame(scaler.fit_transform(train2), columns=train2.columns)

In [None]:
desc = train2.describe().T
desc = desc.drop(['count'], axis=1)
plt.figure(figsize=(10, 6))
plt.plot(desc['min'], label='min')
plt.plot(desc['mean'], label='mean')
plt.plot(desc['max'], label='max')
plt.xticks(rotation=90)
plt.legend()
plt.show()

In [None]:
tsne = TSNE(n_components=2, verbose=1, n_iter=1000, metric='manhattan', random_state=42)
train_tsne = tsne.fit_transform(train2)

train_tsne = pd.DataFrame(train_tsne, columns=['c1','c2'])
train_tsne['specie'] = train.reset_index()['specie']

plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 100
plt.rcParams['figure.figsize'] = 10,10
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
sns.scatterplot(x='c1', y='c2', hue='specie', style='specie', data=train_tsne, s = 70, alpha = 1, palette=colors, legend="full")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('t-SNE Bands')
plt.show()

In [None]:
species = train_tsne['specie'].unique()

# create new columns based on specie unique values
for specie in species:
    train_tsne[f'{specie}'] = 0

for row in train_tsne.index:
    temp = train_tsne.loc[row, ['c1','c2']]
    tempspecie = train_tsne.loc[row, 'specie']
    for specie in species:
        temp2 = train_tsne[specie == train_tsne['specie']][['c1','c2']]
        # exclude actual row
        temp2 = temp2[temp2.index != row]
        train_tsne.loc[row, specie] = manhattan_distances(temp.values.reshape(1,-1), temp2.values).min()

# drop c1 and c2 columns, and pivot from almond to water to long format
train_pivoted = train_tsne.drop(columns=['c1','c2'], axis=1).melt(id_vars='specie', var_name='specie2', value_name='distance')

In [None]:
plt.rcParams['figure.figsize'] = 5,5

# rename specie column to Clase
train_pivoted = train_pivoted.rename(columns={'specie':'Clase', 'distance':'Distancia'})

g = sns.catplot(data=train_pivoted, x='specie2', y='Distancia',
                col='Clase', kind='box', col_wrap=4,
                hue='specie2', palette=colors, height=3.5, aspect=1)

i = 0
for ax in g.axes_dict.items():
    rect = patches.Rectangle((-0.5 + i, 0), 1, 110, linewidth=1, edgecolor='none', facecolor='#F1F1F1')
    ax[1].add_patch(rect)
    if i >= 12:
        ax[1].set_xticklabels(ax[1].get_xticklabels(), rotation=90)
    i = i + 1
g.set_xlabels('')
plt.show()

In [92]:
# train.drop(['ID','n'], axis=1).to_csv('./data/train.csv', index=False)
# val.drop(['ID','n'], axis=1).to_csv('./data/val.csv', index=False)
# test.drop(['ID','n'], axis=1).to_csv('./data/test.csv', index=False)
# train_backup = train.copy()
# val_backup = val.copy()
# test_backup = test.copy()

In [135]:
train = train.drop(['ID','n'], axis=1)
val = val.drop(['ID','n'], axis=1)
test = test.drop(['ID','n'], axis=1)

In [136]:
# class to int
labels = train['specie'].unique()
labels_dict = dict(zip(labels, range(len(labels))))

In [None]:
labels_dict

In [139]:
train['specie'] = train['specie'].map(labels_dict)
val['specie'] = val['specie'].map(labels_dict)
test['specie'] = test['specie'].map(labels_dict)

In [None]:
train.head()

In [143]:
train_labels = to_categorical(train['specie'])
val_labels = to_categorical(val['specie'])
test_labels_expanded = to_categorical(test['specie'])
test_labels = test['specie']

In [151]:
# scale to min max
scaler = MinMaxScaler()
train_s = pd.DataFrame(scaler.fit_transform(train.drop('specie', axis=1)), columns=train.drop('specie', axis=1).columns)
val_s = pd.DataFrame(scaler.transform(val.drop('specie', axis=1)), columns=val.drop('specie', axis=1).columns)
test_s = pd.DataFrame(scaler.transform(test.drop('specie', axis=1)), columns=test.drop('specie', axis=1).columns)
train_ns = train.drop('specie', axis=1)
val_ns = val.drop('specie', axis=1)
test_ns = test.drop('specie', axis=1)

In [None]:
train_s.head()

In [None]:
train_ns.head()

In [221]:
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=50, mode='min')
reduce_lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.75, patience=10, mode='min', min_lr=1e-6)

In [222]:
# basic ANN model with 1 hidden layer
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [223]:
model = create_model()

In [None]:
model.summary()

In [None]:
# scaled
history = model.fit(train_s, train_labels, validation_data=(val_s, val_labels), epochs=250, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [226]:
l = []

In [None]:
test_loss, test_acc = model.evaluate(test_s, test_labels_expanded)
l.append({'model': 'scaled, simple', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
model = create_model()
# no scaled
history2 = model.fit(train_ns, train_labels, validation_data=(val_ns, val_labels), epochs=250, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s, test_labels_expanded)
l.append({'model': 'no scaled, simple', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_acc_scaled')
plt.plot(history.history['val_accuracy'], label='val_acc_scaled')
plt.plot(history2.history['accuracy'], label='train_acc_no_scaled')
plt.plot(history2.history['val_accuracy'], label='val_acc_no_scaled')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [231]:
# let's create some artificially data for no scaled data
train_ns2 = train_ns.copy()
train_ns2[:13] = train_ns2[:13]*0.0001 + 0.1
train_ns2[-13:] = train_ns2[-13:]*100 - 0.5
val_ns2 = val_ns.copy()
val_ns2[:13] = val_ns2[:13]*0.0001 + 0.1
val_ns2[-13:] = val_ns2[-13:]*100 - 0.5
test_ns2 = test_ns.copy()
test_ns2[:13] = test_ns2[:13]*0.0001 + 0.1
test_ns2[-13:] = test_ns2[-13:]*100 - 0.5

In [None]:
model = create_model()
# no scaled with artificially data
history2 = model.fit(train_ns2, train_labels, validation_data=(val_ns2, val_labels), epochs=250, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s, test_labels_expanded)
l.append({'model': 'no scaled, simple, exagerated', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_acc_scaled')
plt.plot(history.history['val_accuracy'], label='val_acc_scaled')
plt.plot(history2.history['accuracy'], label='train_acc_no_scaled')
plt.plot(history2.history['val_accuracy'], label='val_acc_no_scaled')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
# basic ANN model with 1 hidden layer + dropout
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],)))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
model.summary()

In [None]:
history2 = model.fit(train_s, train_labels, validation_data=(val_s, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s, test_labels_expanded)
l.append({'model': 'scaled, simple with dropout', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_1lyr')
plt.plot(history.history['val_accuracy'], label='val_acc_1lyr')
plt.plot(history2.history['accuracy'], label='train_acc_1lyr+dpt')
plt.plot(history2.history['val_accuracy'], label='val_acc_1lyr+dpt')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
# basic ANN model with 1 hidden layer + sigmoid
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],)))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
model.summary()

In [None]:
history3 = model.fit(train_s, train_labels, validation_data=(val_s, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s, test_labels_expanded)
l.append({'model': 'scaled, simple with dropout + sigmoid', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_1lyr')
plt.plot(history.history['val_accuracy'], label='val_acc_1lyr')
plt.plot(history2.history['accuracy'], label='train_acc_1lyr+dpt')
plt.plot(history2.history['val_accuracy'], label='val_acc_1lyr+dpt')
plt.plot(history3.history['accuracy'], label='train_acc_1lyr+dpt+sigmoid')
plt.plot(history3.history['val_accuracy'], label='val_acc_1lyr+dpt+sigmoid')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
# basic ANN model with 1 hidden layer + tanh
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],)))
    model.add(Dense(128, activation='tanh'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
model.summary()

In [244]:
model = create_model()

In [None]:
history4 = model.fit(train_s, train_labels, validation_data=(val_s, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s, test_labels_expanded)
l.append({'model': 'scaled, simple with dropout + tanh', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_1lyr')
plt.plot(history.history['val_accuracy'], label='val_acc_1lyr')
plt.plot(history2.history['accuracy'], label='train_acc_1lyr+dpt')
plt.plot(history2.history['val_accuracy'], label='val_acc_1lyr+dpt')
plt.plot(history3.history['accuracy'], label='train_acc_1lyr+dpt+sigmoid')
plt.plot(history3.history['val_accuracy'], label='val_acc_1lyr+dpt+sigmoid')
plt.plot(history4.history['accuracy'], label='train_acc_1lyr+dpt+tanh')
plt.plot(history4.history['val_accuracy'], label='val_acc_1lyr+dpt+tanh')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [248]:
# deep ANN model with 2 hidden layers + relu
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],)))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()

In [249]:
history = history2

In [None]:
history2 = model.fit(train_s, train_labels, validation_data=(val_s, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s, test_labels_expanded)
l.append({'model': 'scaled, 2hl', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_ann')
plt.plot(history.history['val_accuracy'], label='val_acc_ann')
plt.plot(history2.history['accuracy'], label='train_acc_dnn')
plt.plot(history2.history['val_accuracy'], label='val_acc_dnn')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
# let's get bigger
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],)))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model.summary()

In [254]:
model = create_model()

In [None]:
history3 = model.fit(train_s, train_labels, validation_data=(val_s, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s, test_labels_expanded)
l.append({'model': 'scaled, 3hl', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_ann')
plt.plot(history.history['val_accuracy'], label='val_acc_ann')
plt.plot(history2.history['accuracy'], label='train_acc_dnn')
plt.plot(history2.history['val_accuracy'], label='val_acc_dnn')
plt.plot(history3.history['accuracy'], label='train_acc_dnn_bigger')
plt.plot(history3.history['val_accuracy'], label='val_acc_dnn_bigger')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [259]:
# 3 layers, but more neurons
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],)))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()

In [None]:
model = create_model()
history4 = model.fit(train_s, train_labels, validation_data=(val_s, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s, test_labels_expanded)
l.append({'model': 'scaled, 3hl +neurons', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_ann')
plt.plot(history.history['val_accuracy'], label='val_acc_ann')
plt.plot(history2.history['accuracy'], label='train_acc_dnn')
plt.plot(history2.history['val_accuracy'], label='val_acc_dnn')
plt.plot(history3.history['accuracy'], label='train_acc_dnn_bigger')
plt.plot(history3.history['val_accuracy'], label='val_acc_dnn_bigger')
plt.plot(history4.history['accuracy'], label='train_acc_dnn_bigger_more')
plt.plot(history4.history['val_accuracy'], label='val_acc_dnn_bigger_more')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
# time to deep learning!
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],1)))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(32, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model.summary()

In [264]:
train_s2 = train_s.to_numpy().reshape(train_s.shape[0], train_s.shape[1],1)
val_s2 = val_s.to_numpy().reshape(val_s.shape[0], val_s.shape[1],1)
test_s2 = test_s.to_numpy().reshape(test_s.shape[0], test_s.shape[1],1)

In [None]:
model = create_model()
history2 = model.fit(train_s2, train_labels, validation_data=(val_s2, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s2, test_labels_expanded)
l.append({'model': 'scaled, dl', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_ann')
plt.plot(history.history['val_accuracy'], label='val_acc_ann')
plt.plot(history2.history['accuracy'], label='train_acc_dl')
plt.plot(history2.history['val_accuracy'], label='val_acc_dl')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
# time to deep learning! ver 2, bigger
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],1)))
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model.summary()

In [None]:
model = create_model()
history3 = model.fit(train_s2, train_labels, validation_data=(val_s2, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s2, test_labels_expanded)
l.append({'model': 'scaled, dl v2', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_ann')
plt.plot(history.history['val_accuracy'], label='val_acc_ann')
plt.plot(history2.history['accuracy'], label='train_acc_dl')
plt.plot(history2.history['val_accuracy'], label='val_acc_dl')
plt.plot(history3.history['accuracy'], label='train_acc_dl_v2')
plt.plot(history3.history['val_accuracy'], label='val_acc_dl_v2')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
# time to deep learning! ver 3, longer
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],1)))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(32, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(16, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model.summary()

In [None]:
model = create_model()
history4 = model.fit(train_s2, train_labels, validation_data=(val_s2, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s2, test_labels_expanded)
l.append({'model': 'scaled, dl v3', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_ann')
plt.plot(history.history['val_accuracy'], label='val_acc_ann')
plt.plot(history2.history['accuracy'], label='train_acc_dl')
plt.plot(history2.history['val_accuracy'], label='val_acc_dl')
plt.plot(history3.history['accuracy'], label='train_acc_dl_v2')
plt.plot(history3.history['val_accuracy'], label='val_acc_dl_v2')
plt.plot(history4.history['accuracy'], label='train_acc_dl_v3')
plt.plot(history4.history['val_accuracy'], label='val_acc_dl_v3')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
# time to deep learning! with fancy stuff to control overfitting
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],1)))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(2))
    model.add(Conv1D(32, 3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(2))
    model.add(Conv1D(16, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model.summary()

In [None]:
model = create_model()
history5 = model.fit(train_s2, train_labels, validation_data=(val_s2, val_labels), epochs=300, batch_size=32, callbacks=[early_stopping_callback, reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s2, test_labels_expanded)
l.append({'model': 'scaled, dl v3 + BN', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history4.history['accuracy'], label='train_acc_dl_v3')
plt.plot(history4.history['val_accuracy'], label='val_acc_dl_v3')
plt.plot(history5.history['accuracy'], label='train_acc_dl_v3_bn')
plt.plot(history5.history['val_accuracy'], label='val_acc_dl_v3_bn')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
# time to deep learning! with fancy stuff to control overfitting
def create_model():
    model = Sequential()
    model.add(Input(shape=(train_s.shape[1],1)))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(2))
    model.add(Conv1D(32, 3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(2))
    model.add(Conv1D(16, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(len(labels), activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model.summary()

In [None]:
model = create_model()
history = model.fit(train_ns, train_labels, validation_data=(val_ns, val_labels), epochs=300, batch_size=32, callbacks=[reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_ns, test_labels_expanded)
l.append({'model': 'no scaled, dl v4', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
model = create_model()
history = model.fit(train_s2, train_labels, validation_data=(val_s2, val_labels), epochs=300, batch_size=32, callbacks=[reduce_lr_callback])

In [None]:
test_loss, test_acc = model.evaluate(test_s2, test_labels_expanded)
l.append({'model': 'scaled, dl v4', 'test_loss': test_loss, 'test_acc': test_acc})

In [None]:
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend()
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
pd.DataFrame(l)

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)

In [None]:
test_pred = model.predict(test_s2)

In [297]:
test_pred = np.argmax(test_pred, axis=1)

In [301]:
cm = ConfusionMatrix(np.array(test_labels), test_pred, digit=5)

In [304]:
cm.relabel(mapping={0: 'almond', 1: 'avocado', 2: 'barren', 3: 'barren shadowed', 4: 'forage',
                    5: 'industrial grape', 6: 'lemon', 7: 'mandarin', 8: 'olive', 9: 'orange',
       10: 'riverside vegetation', 11: 'short cycle crop', 12: 'table grape', 13: 'urban',
       14: 'walnut', 15: 'water'})

In [None]:
plt.rcParams['figure.figsize'] = 10,10


cm.plot(cmap=plt.cm.CMRmap_r, number_label=True, plot_lib="matplotlib")
plt.xticks(rotation=90)
plt.title('Matriz de confusión para el mejor modelo')
plt.ylabel('Clase observada')
plt.xlabel('Clase predicha')
plt.show()

In [None]:
dir(cm)

In [None]:
cm.ACC

In [None]:
cm.Overall_ACC

In [None]:
cm.KappaUnbiased