<div class="alert alert-block alert-info">

# Imports

</div>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import Adam, RMSprop
from keras.activations import relu, elu
from keras.losses import binary_crossentropy
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


<div class="alert alert-block alert-info">

# Dataframes

</div>

In [2]:
df_features = pd.read_csv('./dataset/ted_main_features.csv',sep="|",quotechar='"')

In [3]:
df_features.columns

Index(['comments', 'duration', 'languages', 'num_speaker', 'views',
       'film_month', 'film_dayofweek', 'published_month',
       'published_dayofweek', 'event_TED',
       ...
       'world', 'worldwide', 'worth', 'write', 'writer', 'wrong', 'ye', 'year',
       'york', 'young'],
      dtype='object', length=580)

In [4]:
df_features.shape

(2550, 580)

In [5]:
df_features.head()

Unnamed: 0,comments,duration,languages,num_speaker,views,film_month,film_dayofweek,published_month,published_dayofweek,event_TED,...,world,worldwide,worth,write,writer,wrong,ye,year,york,young
0,10,810,1,1,418368,8,5,9,1,1,...,0,0,0,0,0,0,0,0,0,0
1,3,795,1,1,542088,8,5,9,4,1,...,0,0,0,0,0,0,0,0,0,0
2,8,723,4,1,707788,8,5,9,4,1,...,0,0,0,0,0,0,0,0,1,0
3,10,934,2,1,527314,8,5,9,2,1,...,0,0,0,0,0,0,0,0,0,0
4,33,722,4,1,613915,7,2,9,3,1,...,1,0,0,0,0,0,0,0,0,0


<div class="alert alert-block alert-info">

# Dummies

</div>

## Create Dummies

In [6]:
df_film_month = pd.get_dummies(df_features['film_month'],prefix='film_month',drop_first=True)
df_published_month = pd.get_dummies(df_features['published_month'],prefix='published_month',drop_first=True)

df_film_day = pd.get_dummies(df_features['film_dayofweek'],prefix='film_dayofweek',drop_first=True)
df_published_day = pd.get_dummies(df_features['published_dayofweek'],prefix='published_dayofweek',drop_first=True)

In [7]:
df_film_month.head(2)

Unnamed: 0,film_month_2,film_month_3,film_month_4,film_month_5,film_month_6,film_month_7,film_month_8,film_month_9,film_month_10,film_month_11,film_month_12
0,0,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,1,0,0,0,0


In [8]:
df_film_day.head(2)

Unnamed: 0,film_dayofweek_1,film_dayofweek_2,film_dayofweek_3,film_dayofweek_4,film_dayofweek_5,film_dayofweek_6
0,0,0,0,0,1,0
1,0,0,0,0,1,0


In [9]:
print(df_film_month.shape)
print(df_film_day.shape)

(2550, 11)
(2550, 6)


In [10]:
df_published_month.head(2)

Unnamed: 0,published_month_2,published_month_3,published_month_4,published_month_5,published_month_6,published_month_7,published_month_8,published_month_9,published_month_10,published_month_11,published_month_12
0,0,0,0,0,0,0,0,1,0,0,0
1,0,0,0,0,0,0,0,1,0,0,0


In [11]:
df_published_day.head(2)

Unnamed: 0,published_dayofweek_1,published_dayofweek_2,published_dayofweek_3,published_dayofweek_4,published_dayofweek_5,published_dayofweek_6
0,1,0,0,0,0,0
1,0,0,0,1,0,0


In [12]:
print(df_published_month.shape)
print(df_published_day.shape)

(2550, 11)
(2550, 6)


## Drop Columns

In [13]:
df_features = df_features.drop(columns=['film_month','film_dayofweek','published_month','published_dayofweek'])
df_features.head()

Unnamed: 0,comments,duration,languages,num_speaker,views,event_TED,event_TEDx,event_noTED,previous_talks,previous_talk_views,...,world,worldwide,worth,write,writer,wrong,ye,year,york,young
0,10,810,1,1,418368,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,3,795,1,1,542088,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,8,723,4,1,707788,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,10,934,2,1,527314,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,33,722,4,1,613915,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


## Join Dummies

In [14]:
df_features = df_features.merge(df_film_month, left_index=True, right_index=True)
df_features = df_features.merge(df_published_month, left_index=True, right_index=True)
df_features = df_features.merge(df_film_day, left_index=True, right_index=True)
df_features = df_features.merge(df_published_day, left_index=True, right_index=True)
df_features.head()

Unnamed: 0,comments,duration,languages,num_speaker,views,event_TED,event_TEDx,event_noTED,previous_talks,previous_talk_views,...,film_dayofweek_3,film_dayofweek_4,film_dayofweek_5,film_dayofweek_6,published_dayofweek_1,published_dayofweek_2,published_dayofweek_3,published_dayofweek_4,published_dayofweek_5,published_dayofweek_6
0,10,810,1,1,418368,1,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
1,3,795,1,1,542088,1,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
2,8,723,4,1,707788,1,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
3,10,934,2,1,527314,1,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
4,33,722,4,1,613915,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


<div class="alert alert-block alert-info">

# Train, Test, Split

</div>

In [15]:
# Generamos la matriz X y el vector y
X = df_features.drop(columns=['views'])
y = df_features['views']

In [16]:
# Separamos train y test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [17]:
# Estandarizamos las variables
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Chequeamos las shapes
print('X train shape: ', X_train.shape)
print('X test shape: ', X_test.shape)
print('y train shape: ', y_train.shape)
print('y test shape: ', y_test.shape)

X train shape:  (1785, 609)
X test shape:  (765, 609)
y train shape:  (1785,)
y test shape:  (765,)


In [18]:
# Primero, definimos una función que permita construir el modelo, donde todos los valores de los hiperparámetros se obtienen del diccionario params 

def build_model(x_train):

    model = Sequential()
    
    model.add(Dense(512, input_dim=x_train.shape[1]))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.3))
    
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.3))
    
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.3))
    
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.3))
    
    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    
    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    
    model.add(Dense(1))
    model.add(Activation('relu'))
    
    model.compile(optimizer=Adam(lr=3e-6), loss='mse', metrics=['mae'])

    return model

In [19]:
model = build_model(X_train)

In [20]:
# Instanciamos nuestro objeto earlystopping y definimos nuestra lista de callbacks
early_stopping = EarlyStopping(monitor = 'val_loss', min_delta = 0.03, patience = 4, verbose = 1, restore_best_weights=True)
callbacks_list = [early_stopping]

In [21]:
history = model.fit(X_train, y_train, batch_size=16, epochs=8000, validation_split = 0.2, verbose=1)

Train on 1428 samples, validate on 357 samples
Epoch 1/8000
Epoch 2/8000
Epoch 3/8000
Epoch 4/8000
Epoch 5/8000
Epoch 6/8000
Epoch 7/8000
Epoch 8/8000
Epoch 9/8000
Epoch 10/8000
Epoch 11/8000
Epoch 12/8000
Epoch 13/8000
Epoch 14/8000
Epoch 15/8000
Epoch 16/8000
Epoch 17/8000
Epoch 18/8000
Epoch 19/8000
Epoch 20/8000
Epoch 21/8000
Epoch 22/8000
Epoch 23/8000
Epoch 24/8000
Epoch 25/8000
Epoch 26/8000
Epoch 27/8000
Epoch 28/8000
Epoch 29/8000
Epoch 30/8000
Epoch 31/8000
Epoch 32/8000
Epoch 33/8000
Epoch 34/8000
Epoch 35/8000
Epoch 36/8000
Epoch 37/8000
Epoch 38/8000
Epoch 39/8000
Epoch 40/8000


Epoch 41/8000
Epoch 42/8000
Epoch 43/8000
Epoch 44/8000
Epoch 45/8000
Epoch 46/8000
Epoch 47/8000
Epoch 48/8000
Epoch 49/8000
Epoch 50/8000
Epoch 51/8000
Epoch 52/8000
Epoch 53/8000
Epoch 54/8000
Epoch 55/8000
Epoch 56/8000
Epoch 57/8000
Epoch 58/8000
Epoch 59/8000
Epoch 60/8000
Epoch 61/8000
Epoch 62/8000
Epoch 63/8000
Epoch 64/8000
Epoch 65/8000
Epoch 66/8000
Epoch 67/8000
Epoch 68/8000
Epoch 69/8000
Epoch 70/8000
Epoch 71/8000
Epoch 72/8000
Epoch 73/8000
Epoch 74/8000
Epoch 75/8000
Epoch 76/8000
Epoch 77/8000
Epoch 78/8000
Epoch 79/8000


Epoch 80/8000
Epoch 81/8000
Epoch 82/8000
Epoch 83/8000
Epoch 84/8000
Epoch 85/8000
Epoch 86/8000
Epoch 87/8000
Epoch 88/8000
Epoch 89/8000
Epoch 90/8000
Epoch 91/8000
Epoch 92/8000
Epoch 93/8000
Epoch 94/8000
Epoch 95/8000
Epoch 96/8000
Epoch 97/8000
Epoch 98/8000
Epoch 99/8000
Epoch 100/8000
Epoch 101/8000
Epoch 102/8000
Epoch 103/8000
Epoch 104/8000
Epoch 105/8000
Epoch 106/8000
Epoch 107/8000
Epoch 108/8000
Epoch 109/8000
Epoch 110/8000
Epoch 111/8000
Epoch 112/8000
Epoch 113/8000
Epoch 114/8000
Epoch 115/8000
Epoch 116/8000
Epoch 117/8000
Epoch 118/8000
Epoch 119/8000


Epoch 120/8000
Epoch 121/8000
Epoch 122/8000
Epoch 123/8000
Epoch 124/8000
Epoch 125/8000
Epoch 126/8000
Epoch 127/8000
Epoch 128/8000
Epoch 129/8000
Epoch 130/8000
Epoch 131/8000
Epoch 132/8000
Epoch 133/8000
Epoch 134/8000
Epoch 135/8000
Epoch 136/8000
Epoch 137/8000
Epoch 138/8000
Epoch 139/8000
Epoch 140/8000
Epoch 141/8000
Epoch 142/8000
Epoch 143/8000
Epoch 144/8000
Epoch 145/8000
Epoch 146/8000
Epoch 147/8000
Epoch 148/8000
Epoch 149/8000
Epoch 150/8000
Epoch 151/8000
Epoch 152/8000
Epoch 153/8000
Epoch 154/8000
Epoch 155/8000
Epoch 156/8000
Epoch 157/8000
Epoch 158/8000




KeyboardInterrupt: 