In [1]:
import pandas as pd
import numpy as np
import scipy as sci
import bokeh.plotting as bpl
from bokeh.io import export_png
from bokeh.models import PrintfTickFormatter
import math
import bokeh.layouts as bly
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [42]:
bpl.output_notebook()

In [20]:
data = pd.read_csv("../data/originales/posts.csv")

In [21]:
metricas = ['likes', 'love', 'angry', 'wow', 'haha', 'sad', 'shares']

In [22]:
data["reacciones"] = data[metricas].sum(1)

In [23]:
fdata=data[(data["scope"]!=0)&(data["reacciones"]>10)&(data["reacciones"]<=data["scope"])]

In [24]:
mdata = fdata[metricas + ["scope"]]

In [25]:
mdata[:3]

Unnamed: 0,likes,love,angry,wow,haha,sad,shares,scope
0,18,7,0,0,0,0,4,3660
1,526,117,189,15,51,8,107,77468
2,28,1,0,0,0,0,13,4399


In [26]:
train_dataset = mdata.sample(frac=0.8, random_state=0)
test_dataset = mdata.drop(train_dataset.index)

In [27]:
len(train_dataset)

5469

In [29]:
len(test_dataset)

1367

In [30]:
train_labels = train_dataset.pop('scope')
test_labels = test_dataset.pop('scope')

In [31]:
train_stats = train_dataset.describe().T
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
likes,5469.0,143.385628,896.060518,0.0,18.0,42.0,113.0,45952.0
love,5469.0,25.119217,133.628907,0.0,1.0,4.0,17.0,4694.0
angry,5469.0,4.589687,57.418947,0.0,0.0,0.0,1.0,2247.0
wow,5469.0,4.491863,37.880097,0.0,0.0,0.0,2.0,1398.0
haha,5469.0,9.505028,207.284951,0.0,0.0,0.0,2.0,14418.0
sad,5469.0,1.879503,26.109105,0.0,0.0,0.0,0.0,1244.0
shares,5469.0,180.894313,5059.991194,0.0,5.0,14.0,44.0,342306.0


In [32]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

In [33]:
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

In [189]:
def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

In [190]:
model = build_model()

In [191]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 64)                512       
_________________________________________________________________
dense_18 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 65        
Total params: 4,737
Trainable params: 4,737
Non-trainable params: 0
_________________________________________________________________


In [192]:
datainp = train_dataset
datalab = train_labels
testinp = test_dataset
testlab = test_labels

In [193]:
# Display training progress by printing a single dot for each completed epoch
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

EPOCHS = 1000

history = model.fit(
  datainp, datalab,
  epochs=EPOCHS, validation_split = 0.2, verbose=0,
  callbacks=[PrintDot()])


....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
..........................................................................................

In [194]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist[-3:]

Unnamed: 0,loss,mae,mse,val_loss,val_mae,val_mse,epoch
997,11626860000.0,11761.013672,11626860000.0,11008660000.0,12990.950195,11008660000.0,997
998,10031070000.0,11775.509766,10031070000.0,10600300000.0,12947.262695,10600300000.0,998
999,12686860000.0,11990.219727,12686860000.0,10001980000.0,12889.575195,10001980000.0,999


In [195]:
p0 = bpl.figure(plot_width=300,plot_height=300,toolbar_location=None, title="Datos normalizados")

In [196]:
p0.line(x=hist["epoch"],y=hist["val_mae"],legend_label="Error validación", color="red")
p0.line(x=hist["epoch"],y=hist["mae"],legend_label="Error entrenamiento", color="black")

In [197]:
p0.xaxis.axis_label = 'Epoch'
p0.yaxis.axis_label = 'Mean Abs Error [Scope]'

In [198]:
p1 = bpl.figure(plot_width=300,plot_height=300,toolbar_location=None, title="Datos normalizados")

In [199]:
p1.line(x=hist["epoch"],y=hist["val_mse"],legend_label="Error validación", color="red")
p1.line(x=hist["epoch"],y=hist["mse"],legend_label="Error entrenamiento", color="black")

In [200]:
p1.xaxis.axis_label = 'Epoch'
p1.yaxis.axis_label = 'Mean Square Error [Scope^2]'

In [201]:
bpl.show(bly.row([p0,p1]))

In [202]:
loss, mae, mse = model.evaluate(testinp, testlab, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} Scope".format(mae))

43/43 - 0s - loss: 15517217792.0000 - mae: 15134.2930 - mse: 15517217792.0000
Testing set Mean Abs Error: 15134.29 Scope


In [203]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=50)

history = model.fit(datainp, datalab, epochs=EPOCHS,
                    validation_split = 0.2, verbose=0, callbacks=[early_stop, PrintDot()])


....................................................................................................
....................................................................................................
.........................................................................

In [204]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist[-3:]

Unnamed: 0,loss,mae,mse,val_loss,val_mae,val_mse,epoch
270,9768780000.0,13340.78125,9768780000.0,6547294000.0,12654.310547,6547294000.0,270
271,9434931000.0,13246.570312,9434931000.0,6582029000.0,12674.728516,6582029000.0,271
272,9183062000.0,13158.078125,9183062000.0,6735285000.0,12694.072266,6735285000.0,272


In [205]:
p0 = bpl.figure(plot_width=300,plot_height=300,toolbar_location=None, title="Datos normalizados")

In [206]:
p0.line(x=hist["epoch"],y=hist["val_mae"],legend_label="Error validación", color="red")
p0.line(x=hist["epoch"],y=hist["mae"],legend_label="Error entrenamiento", color="black")

In [207]:
p0.xaxis.axis_label = 'Epoch'
p0.yaxis.axis_label = 'Mean Abs Error [Scope]'

In [208]:
p1 = bpl.figure(plot_width=300,plot_height=300,toolbar_location=None, title="Datos normalizados")

In [209]:
p1.line(x=hist["epoch"],y=hist["val_mse"],legend_label="Error validación", color="red")
p1.line(x=hist["epoch"],y=hist["mse"],legend_label="Error entrenamiento", color="black")

In [210]:
p1.xaxis.axis_label = 'Epoch'
p1.yaxis.axis_label = 'Mean Square Error [Scope^2]'

In [211]:
bpl.show(bly.row([p0,p1]))

In [212]:
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} Scope".format(mae))

43/43 - 0s - loss: 65166135296.0000 - mae: 31755.8594 - mse: 65166135296.0000
Testing set Mean Abs Error: 31755.86 Scope


In [213]:
test_predictions = model.predict(testinp).flatten()

In [214]:
pc = bpl.figure(plot_width=400, plot_height=400, title="Datos normalizados", toolbar_location=None,x_axis_type="log",y_axis_type="log")

In [215]:
pc.line(x=[1,10000000],y=[1,10000000],line_width=0.5,line_dash="dashed", color="black")
pc.circle(x=testlab,y=test_predictions,color="red")

In [216]:
pc.xaxis.axis_label = 'Valores reales [MPG]'
pc.yaxis.axis_label = 'Predicción [MPG]'

In [217]:
bpl.show(pc)

In [227]:
error = test_predictions - test_labels

In [238]:
histogram = np.histogram(error, bins=300)

In [239]:
ph = bpl.figure(plot_width=300, plot_height=300, title="Datos normalizados", toolbar_location=None)

In [240]:
ph.vbar(x=histogram[1][:-1],top=histogram[0],width=(histogram[1][1]-histogram[1][0])*0.9,line_color="black",fill_color="red",fill_alpha=0.7)

In [241]:
ph.xaxis.axis_label = 'Error en la predicción [MPG]'
ph.yaxis.axis_label = 'Frecuencia'

In [242]:
bpl.show(ph)

In [226]:
np.abs(error).describe()

count    1.367000e+03
mean     1.432495e+04
std      1.134389e+05
min      5.915527e-01
25%      1.550213e+03
50%      3.244619e+03
75%      7.176052e+03
max      2.996994e+06
Name: scope, dtype: float64