# **Predecir eficiencia de la gasolina (TensorFlow)**
Andrey Duvan Rincon Torres

---

In [26]:
# Librerias Nesesarias
import pathlib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import plotly.express as plx

## **El modelo**

In [None]:
# Modelo de regresion
class Model():
  def __init__(self):
    classifier = keras.Sequential()
    ## Adiciona capas una por una
    classifier.add(layers.Dense(units=16, activation='relu', input_shape=(30,)))
    # Adding dropout to prevent overfitting (regularización)
    classifier.add(layers.Dropout(0.1)) # 10% out in each epoc
    classifier.add(layers.Dense(units=16, activation='relu'))
    # Adding dropout to prevent overfitting (regularización)
    classifier.add(layers.Dropout(0.1))
    classifier.add(layers.Dense(units=1, activation='sigmoid'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer,loss='binary_crossentropy', metrics=['accuracy'])
    self.model = model

## **Los datos**

In [27]:
# Lectura de los datos
data = pd.read_csv('data.csv')
del data['Unnamed: 32']
x = data.iloc[:,2:]
y = data.iloc[:,1]
# Dividir en entrenamiento y test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 0)

In [None]:
# Clase de los datos
class DataModule():
    # Definimos un tamaño de lote en la calse
    def __init__(self, batch_size = 1):
        super(DataModule,self).__init__()
        self.batch_size = batch_size
    # Definimos la transformacion de la variable objetivo
    def transform_y(self, y):
      labelencoder_y = LabelEncoder()
      return labelencoder_y.fit_transform(y)
    # Definimos la funcion que transforma los datos
    def transform(self, x):
      scaler = StandardScaler()
      scaler.fit(x_train)
      return scaler.transform(data)
    # Definimos el tratamiento de los datos
    def setup(self, stage):
        # Aplicamos las tranformaciones
        if stage == "fit":
          train_data = self.transform(x_train)
          train_labels = self.transform_y(y_train)
          # Transformamos a tensores
          train_data = tf.convert_to_tensor(train_data, dtype=tf.float32)
          train_labels = tf.convert_to_tensor(train_labels, dtype=tf.float32)
          self.train_dataset = train_data,train_labels
        if stage == "test":
          test_data = self.transform(x_test)
          test_labels = self.transform_y(y_test)
          # Transformamos a tensores
          test_data = tf.convert_to_tensor(test_data, dtype=tf.float32)
          test_labels = tf.convert_to_tensor(test_labels, dtype=tf.float32)  
          self.test_dataset = test_data,test_labels

## **El entrenamiento**

In [None]:
# Muestre el progreso del entrenamiento imprimiendo un solo punto para cada época completada
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

In [None]:
# Definimos modelo y proceso de entrenamiento
class Regression():
    # creamos la estructura de la red
    def __init__(self,data,model,epoch,batch_size=32):
        super().__init__()
        self.data = data
        self.epoch= epoch
        self.model = model.model
        self.batch_size = batch_size
    # Entrenamiento del modelo
    def fit(self):
        model = self.model
        x, y  = self.data.train_dataset
        early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
        history = model.fit(x, y,
            epochs = self.epoch, validation_split = 0.2,
            verbose = 0, callbacks=[early_stop, PrintDot()], batch_size=self.batch_size)
        self.history = history
        return history
    # Paso de prueba
    def test_step(self):
        x,y = self.data.test_dataset
        # Predicting the Test set results
        y_pred = self.model.predict(x_test)
        y_pred[y_pred > 0.5] = 1
        y_pred[y_pred <=0.5] = 0
        cm = confusion_matrix(y, y_pred)
        print("Our accuracy is {}%".format(((cm[0][0] + cm[1][1])/y_test.shape[0])*100))
        return cm
    # Predicciones
    def predict(self,data):
        data = self.data.transform(data)
        data = tf.convert_to_tensor(data, dtype=tf.float32)
        test_predictions = self.model.predict(data)
        test_predictions[test_predictions > 0.5] = 1
        test_predictions[test_predictions <= 0.5] = 0
        return test_predictions

##  Ajustar el modelo

In [None]:
data_module = DataModule() # Ejecutamos modulo de datos
data_module.setup('fit')
model = Model() # Ejecutamos modelo
task = Regression(data_module,model,epoch = 100) # Ejecutamos proceso de entrenamiento
task.fit() # Entrenamos el modelo


.....................................

<keras.callbacks.History at 0x7f91c5e14e10>

In [None]:
data_module.setup('test')
task.test_step() # Test del modelo

3/3 - 0s - loss: 0.1059 - mae: 0.2664 - mse: 0.1059 - 26ms/epoch - 9ms/step
Testing set Mean Abs Error:  0.27 MPG


(0.10594634711742401, 0.2664196491241455, 0.10594634711742401)

In [None]:
hist = pd.DataFrame(task.history.history)
hist['epoch'] = task.history.epoch
hist.tail()

Unnamed: 0,loss,mae,mse,val_loss,val_mae,val_mse,epoch
32,0.089303,0.208358,0.089303,0.118146,0.261039,0.118146,32
33,0.08936,0.208333,0.08936,0.117696,0.260803,0.117696,33
34,0.087941,0.206623,0.087941,0.120247,0.264157,0.120247,34
35,0.087929,0.205028,0.087929,0.118642,0.264008,0.118642,35
36,0.087206,0.205791,0.087206,0.121167,0.263106,0.121167,36


In [None]:
def plot_history(history,val):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  fig = go.Figure([
        go.Scatter(x=hist['epoch'], y=hist[val],name="Train",hovertemplate="%{y}%{_xother}")
        ,go.Scatter(x=hist['epoch'], y=hist['val_' + val],name="Val",hovertemplate="%{y}%{_xother}")
        ])
  fig.update_layout(
      xaxis_title="Epoch",
      yaxis_title=val,
      hovermode="x unified"
  )
  return fig.show()
plot_history(task.history, val='mae')

In [None]:
plot_history(task.history, val='mse')

In [None]:
dat = test_dataset.drop(['MPG'],axis = 1)
test_predictions = task.predict(dat)
test_labels = data_module.transform(test_dataset)['MPG'].values
fig = plx.scatter(x=test_labels, y=test_predictions, height=500, width=500)
fig.update_layout(shapes=[dict(type= 'line',yref= 'y', y0=-2, y1= 3,xref= 'x', x0=-2, x1= 3)])
fig.update_layout(
    title="Ajuste de las predicciones",
    xaxis_title="test_labels",
    yaxis_title="test_predictions",
)
fig.show()

In [None]:
import plotly.graph_objects as go
error = test_predictions - test_labels
fig = plx.histogram(x=error,height=500, width=600)
fig.update_layout(
    title="Distribucion de los errores",
    xaxis_title="Prediction error",
    yaxis_title="count",
)
fig.show()