# **Predecir eficiencia de la gasolina (TensorFlow)**
Andrey Duvan Rincon Torres

---

In [3]:
# Librerias Nesesarias
import pathlib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import plotly.express as plx
import plotly.graph_objects as go

## **El modelo**

In [4]:
# Modelo de regresion
class Model():
  def __init__(self):
    model = keras.Sequential()
    model.add(layers.Dense(units=16, activation='relu', input_shape=(30,)))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(units=16, activation='relu'))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(units=1, activation='sigmoid'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
    model.compile(optimizer=optimizer,loss='binary_crossentropy', metrics=['accuracy'])
    self.model = model

## **Los datos**

In [6]:
# Lectura de los datos
data = pd.read_csv('data.csv')
del data['Unnamed: 32']
x = data.iloc[:,2:]
y = data.iloc[:,1]
# Dividir en entrenamiento y test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 0)

In [7]:
# Clase de los datos
class DataModule():
    # Definimos un tamaño de lote en la calse
    def __init__(self, batch_size = 32):
        super(DataModule,self).__init__()
        self.batch_size = batch_size
        self.labelencoder_y = LabelEncoder()
        self.labelencoder_y.fit(y_train)
        self.scaler_x = StandardScaler()
        self.scaler_x.fit(x_train)
    # Definimos el tratamiento de los datos
    def setup(self, stage):
        if stage == "fit":
          # Aplicamos las tranformaciones
          train_data = self.scaler_x.transform(x_train)
          train_labels = self.labelencoder_y.transform(y_train)
          # Transformamos a tensores
          train_data = tf.convert_to_tensor(train_data, dtype=tf.float32)
          train_labels = tf.convert_to_tensor(train_labels, dtype=tf.float32)
          self.train_dataset = train_data,train_labels
        if stage == "test":
          test_data = self.scaler_x.transform(x_test)
          test_labels = self.labelencoder_y.transform(y_test)
          # Transformamos a tensores
          test_data = tf.convert_to_tensor(test_data, dtype=tf.float32)
          test_labels = tf.convert_to_tensor(test_labels, dtype=tf.float32)  
          self.test_dataset = test_data,test_labels

## **El entrenamiento**

In [8]:
# Muestre el progreso del entrenamiento imprimiendo un solo punto para cada época completada
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

In [19]:
# Definimos modelo y proceso de entrenamiento
class Train():
    # creamos la estructura de la red
    def __init__(self,data,model,epoch,batch_size=32):
        super().__init__()
        self.data = data
        self.epoch= epoch
        self.model = model.model
        self.batch_size = batch_size
    # Entrenamiento del modelo
    def fit(self):
        model = self.model
        x, y  = self.data.train_dataset
        my_callbacks = [
                        tf.keras.callbacks.EarlyStopping(patience=2),
                        tf.keras.callbacks.ModelCheckpoint(filepath='model.{epoch:02d}-{val_loss:.2f}.h5'),
                        tf.keras.callbacks.TensorBoard(log_dir='./logs'),
                        ]
        history = model.fit(x, y,
            epochs = self.epoch, validation_split = 0.2,
            verbose = 0, callbacks=my_callbacks, batch_size=self.batch_size)
        self.history = history
        return history
    # Paso de prueba
    def test_step(self):
        self.data.setup('test')
        x,y = self.data.test_dataset
        # Predicting the Test set results
        y_pred = self.model.predict(x_test)
        y_pred[y_pred > 0.5] = 1
        y_pred[y_pred <=0.5] = 0
        cm = confusion_matrix(y, y_pred)
        print("Our accuracy is {}%".format(((cm[0][0] + cm[1][1])/y_test.shape[0])*100))
        return cm
    # Predicciones
    def predict(self,data):
        data = self.data.transform(data)
        data = tf.convert_to_tensor(data, dtype=tf.float32)
        test_predictions = self.model.predict(data)
        test_predictions[test_predictions > 0.5] = 1
        test_predictions[test_predictions <= 0.5] = 0
        return test_predictions

##  Ajustar el modelo

In [20]:
data_module = DataModule() # Ejecutamos modulo de datos
data_module.setup('fit')
model = Model() # Ejecutamos modelo
task = Train(data_module,model,epoch = 150) # Ejecutamos proceso de entrenamiento
task.fit() # Entrenamos el modelo

<keras.callbacks.History at 0x7f05d44e7150>

In [21]:
cm = task.test_step()

Our accuracy is 38.59649122807017%


In [22]:
fig = plx.imshow(cm, text_auto=True)
fig.show()

In [23]:
hist = pd.DataFrame(task.history.history)
hist['epoch'] = task.history.epoch
hist.tail()

Unnamed: 0,loss,accuracy,val_loss,val_accuracy,epoch
7,0.055264,0.98533,0.029437,0.980583,7
8,0.052937,0.982885,0.027406,0.980583,8
9,0.038765,0.99022,0.026181,0.990291,9
10,0.035995,0.98533,0.030233,0.990291,10
11,0.037186,0.987775,0.030999,0.980583,11


In [24]:
def plot_history(history,val):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  fig = go.Figure([
        go.Scatter(x=hist['epoch'], y=hist[val],name="Train",hovertemplate="%{y}%{_xother}")
        ,go.Scatter(x=hist['epoch'], y=hist['val_' + val],name="Val",hovertemplate="%{y}%{_xother}")
        ])
  fig.update_layout(
      xaxis_title="Epoch",
      yaxis_title=val,
      hovermode="x unified"
  )
  return fig.show()
plot_history(task.history, val='loss')

In [25]:
plot_history(task.history, val='accuracy')