# **Predecir eficiencia de la gasolina (Pytorch-Ligthning)**
Andrey Duvan Rincon Torres

---

In [1]:
# Librerias Nesesarias
import torch
import pandas as pd
import numpy as np
from torch import nn
import pytorch_lightning as pl
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torchtext.utils import download_from_url
from torchmetrics.functional import accuracy
mse_loss = nn.MSELoss(reduction = 'mean')

  from .autonotebook import tqdm as notebook_tqdm


## **El modelo**

In [2]:

# Definimos modelo y proceso de entrenamiento
class Regression(pl.LightningModule):
    # creamos la estructura de la red
    def __init__(self):
        super(Regression,self).__init__()
        self.layer_1 = torch.nn.Linear(9,64)
        self.layer_2 = torch.nn.Linear(64,64)
        self.layer_3 = torch.nn.Linear(64,1)
    # definimos el comportamiento de las capas
    def forward(self, x):
        # layer 1 (activacion relu)
        x = self.layer_1(x)
        x = torch.relu(x)
        # layer 2 (activacion relu)
        x = self.layer_2(x)
        x = torch.relu(x)
        # layer 3 (salida final sin activacion)
        x = self.layer_3(x)
        return x
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = mse_loss(y_hat, y)
        logs = {'loss': loss}
        return {'loss': loss, 'log': logs}
    
    def validation_step(self, batch, batch_idx):
        loss = self._shared_eval_step(batch, batch_idx)
        return {"val_loss": loss}
    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        tensorboard_logs = {'val_loss': avg_loss}
        return {'avg_val_loss': avg_loss, 'log': tensorboard_logs}
    
    def test_step(self, batch, batch_idx):
        loss, acc = self._shared_eval_step(batch, batch_idx)
        metrics = {"test_acc": acc, "test_loss": loss}
        self.log_dict(metrics)
        return metrics
    
    def _shared_eval_step(self,batch,batch_idx):
        x, y  = batch
        y_hat = self.forward(x)
        loss = mse_loss(y_hat, y)
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = mse_loss(y_hat, y)
        correct = torch.sum(y_hat == y.data)
        predictions_pred.append(y_hat)
        predictions_actual.append(y.data)
        return {'test_loss': loss, 'test_correct': correct, 'logits': y_hat}
    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        logs = {'test_loss': avg_loss}      
        return {'avg_test_loss': avg_loss, 'log': logs, 'progress_bar': logs }
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)

## **Los datos**

In [3]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
path = 'auto-mpg.data'
download_from_url(url,path)
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv('auto-mpg.data', names=column_names, na_values = "?", comment='\t', sep=" ", skipinitialspace=True)
dataset = raw_dataset.dropna()
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
        
        

In [4]:
class DataModule(pl.LightningDataModule):
    def __init__(self, batch_size = 1):
        super(DataModule,self).__init__()
        self.batch_size = batch_size
    
    def transform(self, x):
        self.train_stats = train_dataset.describe().transpose()
        tab = x.copy()
        origin = tab.pop('Origin')
        tab = (tab - self.train_stats['mean'][:-1])/self.train_stats['std'][:-1]
        tab['USA'] = (origin == 1)*1.0
        tab['Europe'] = (origin == 2)*1.0
        tab['Japan'] = (origin == 3)*1.0
        return tab
    
    def setup(self, stage):
        train_data = self.transform(train_dataset)
        test_data = self.transform(test_dataset)
        val_data = train_data.sample(frac=0.2,random_state=0)
        train_data = train_data.drop(val_data.index)
        
        self.train_dataset = TensorDataset(torch.tensor(train_data.drop(['MPG'], axis = 1).values.astype(np.float32))
                              ,torch.tensor(train_data['MPG'].values.astype(np.float32)))
        self.test_dataset = TensorDataset(torch.tensor(test_data.drop(['MPG'], axis = 1).values.astype(np.float32))
                              ,torch.tensor(test_data['MPG'].values.astype(np.float32)))
        self.val_dataset = TensorDataset(torch.tensor(val_data.drop(['MPG'], axis = 1).values.astype(np.float32))
                              ,torch.tensor(val_data['MPG'].values.astype(np.float32)))
        
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size)
    
    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)


In [13]:
data_module = DataModule()
task = Regression()
trainer = pl.Trainer()
trainer.fit(task,data_module)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type   | Params
-----------------------------------
0 | layer_1 | Linear | 640   
1 | layer_2 | Linear | 4.2 K 
2 | layer_3 | Linear | 65    
-----------------------------------
4.9 K     Trainable params
0         Non-trainable params
4.9 K     Total params
0.019     Total estimated model params size (MB)


                                                              

  rank_zero_warn(
  return F.mse_loss(input, target, reduction=self.reduction)
  rank_zero_warn(


Epoch 0:   5%|▍         | 15/314 [00:00<00:02, 136.73it/s, loss=0.913, v_num=21]

  rank_zero_deprecation(


Epoch 216:  74%|███████▎  | 231/314 [00:02<00:00, 89.24it/s, loss=0.0971, v_num=21] 

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
