<div class="alert alert-block alert-info">

# Imports

</div>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import talos as ta
from talos.utils import hidden_layers, early_stopper

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam, RMSprop
from keras.activations import relu, elu
from keras.losses import binary_crossentropy

<div class="alert alert-block alert-info">

# Dataframes

</div>

In [None]:
df_features = pd.read_csv('./dataset/ted_main_features.csv',sep="|",quotechar='"')

In [None]:
df_features.columns

In [None]:
df_features.shape

In [None]:
df_features.head()

<div class="alert alert-block alert-info">

# Dummies

</div>

## Create Dummies

In [None]:
df_film_month = pd.get_dummies(df_features['film_month'],prefix='film_month',drop_first=True)
df_published_month = pd.get_dummies(df_features['published_month'],prefix='published_month',drop_first=True)

df_film_day = pd.get_dummies(df_features['film_dayofweek'],prefix='film_dayofweek',drop_first=True)
df_published_day = pd.get_dummies(df_features['published_dayofweek'],prefix='published_dayofweek',drop_first=True)

In [None]:
df_film_month.head(2)

In [None]:
df_film_day.head(2)

In [None]:
print(df_film_month.shape)
print(df_film_day.shape)

In [None]:
df_published_month.head(2)

In [None]:
df_published_day.head(2)

In [None]:
print(df_published_month.shape)
print(df_published_day.shape)

## Drop Columns

In [None]:
df_features = df_features.drop(columns=['film_month','film_dayofweek','published_month','published_dayofweek'])
df_features.head()

## Join Dummies

In [None]:
df_features = df_features.merge(df_film_month, left_index=True, right_index=True)
df_features = df_features.merge(df_published_month, left_index=True, right_index=True)
df_features = df_features.merge(df_film_day, left_index=True, right_index=True)
df_features = df_features.merge(df_published_day, left_index=True, right_index=True)
df_features.head()

<div class="alert alert-block alert-info">

# Train, Test, Split

</div>

In [None]:
# Generamos la matriz X y el vector y
X = df_features.drop(columns=['views'])
y = df_features['views']

In [None]:
# Separamos train y test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [None]:
# Estandarizamos las variables
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Chequeamos las shapes
print('X train shape: ', X_train.shape)
print('X test shape: ', X_test.shape)
print('y train shape: ', y_train.shape)
print('y test shape: ', y_test.shape)

In [None]:
# Primero, definimos una función que permita construir el modelo, donde todos los valores de los hiperparámetros se obtienen del diccionario params 

def views_ted_talks_model(x_train, y_train, x_val, y_val, params):

    model = Sequential()
    
    model.add(Dense(params['first_neuron'], input_dim=x_train.shape[1],
                    activation=params['activation']))
    
    hidden_layers(model, params, 1) # Necesario para explorar distintas configuraciones de capas ocultas
    
    model.add(Dense(1))
    
    model.compile(optimizer='rmsprop',
                  loss='mse',
                  metrics=['mae'])
    
    history = model.fit(x_train, y_train,
                        validation_data=[x_val, y_val],
                        batch_size=params['batch_size'],
                        epochs=params['epochs'],
                        verbose=1,
                        callbacks=[early_stopper(params['epochs'])])

    return history, model

In [None]:
# Segundo, almacenamos en un diccionario el espacio de hiperparámetros a explorar

p = {'first_neuron':[512, 256, 128, 64],     # Requerido si queremos probar distinta cantidad de hidden_layers
     'hidden_layers':[8, 12, 16],     # Requerido si queremos probar distinta cantidad de hidden_layers
     'dropout': [0, 0.3, 0.6],   # Requerido si queremos probar distinta cantidad de hidden_layers
     'shapes': ['triangle'],              # Requerido si queremos probar distinta cantidad de hidden_layers 
     'batch_size': [512, 128, 32, 16],
     'epochs': [500, 700, 2000],
     'activation':[relu, elu],}

In [None]:
# Tercero, realizamos la búsqueda con la clase Scan provista por Talos

t = ta.Scan(x=X_train, y=y_train, model=views_ted_talks_model, params=p, experiment_name='views_ted_talks_model', seed=42)

In [None]:
# Pandas DataFrame que resume la información correspondiente a cada prueba
t.data.head()