# redes neuronales con pytorch

## librerías

In [1]:
import pandas as pd
import numpy as np

## dataset

Medical Costs Dataset https://www.kaggle.com/mirichoi0218/insurance?ref=hackernoon.com

In [2]:
df = pd.read_csv('insurance.csv')

In [6]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [3]:
df.shape

(1338, 7)

In [4]:
data = pd.get_dummies(df)
data.head()

Unnamed: 0,age,bmi,children,charges,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,19,27.9,0,16884.924,1,0,0,1,0,0,0,1
1,18,33.77,1,1725.5523,0,1,1,0,0,0,1,0
2,28,33.0,3,4449.462,0,1,1,0,0,0,1,0
3,33,22.705,0,21984.47061,0,1,1,0,0,1,0,0
4,32,28.88,0,3866.8552,0,1,1,0,0,1,0,0


In [5]:
data.describe(percentiles = [0.01, 0.05, 0.10, 0.90, 0.95, 0.99])

Unnamed: 0,age,bmi,children,charges,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
count,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265,0.494768,0.505232,0.795217,0.204783,0.242152,0.2429,0.272048,0.2429
std,14.04996,6.098187,1.205493,12110.011237,0.50016,0.50016,0.403694,0.403694,0.428546,0.428995,0.445181,0.428995
min,18.0,15.96,0.0,1121.8739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1%,18.0,17.89515,0.0,1252.97273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5%,18.0,21.256,0.0,1757.7534,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10%,19.0,22.99,0.0,2346.533615,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,39.0,30.4,1.0,9382.033,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
90%,59.0,38.6195,3.0,34831.7197,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
95%,62.0,41.106,3.0,41181.827787,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## conjuntos train, validation, test

In [7]:
X = data.drop('charges', axis = 1)
y = data['charges']

In [8]:
X.head()

Unnamed: 0,age,bmi,children,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,19,27.9,0,1,0,0,1,0,0,0,1
1,18,33.77,1,0,1,1,0,0,0,1,0
2,28,33.0,3,0,1,1,0,0,0,1,0
3,33,22.705,0,0,1,1,0,0,1,0,0
4,32,28.88,0,0,1,1,0,0,1,0,0


In [9]:
y.head()

0    16884.92400
1     1725.55230
2     4449.46200
3    21984.47061
4     3866.85520
Name: charges, dtype: float64

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 12345)

In [12]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.25, random_state = 12345)

In [13]:
X_train.shape, X_val.shape, X_test.shape

((802, 11), (268, 11), (268, 11))

In [14]:
y_train.shape, y_val.shape, y_test.shape

((802,), (268,), (268,))

## crear dataloaders

In [15]:
a = np.array([1,2,3])

In [16]:
a

array([1, 2, 3])

In [17]:
import torch
from torch.utils.data import TensorDataset, DataLoader

In [19]:
t = torch.from_numpy(a)

In [20]:
t

tensor([1, 2, 3])

In [39]:
train_data = TensorDataset(torch.from_numpy(X_train.values), torch.from_numpy(y_train.values))
valid_data = TensorDataset(torch.from_numpy(X_val.values), torch.from_numpy(y_val.values))
test_data = TensorDataset(torch.from_numpy(X_test.values), torch.from_numpy(y_test.values))

In [40]:
train_loader = DataLoader(train_data, shuffle=False, batch_size=64)
valid_loader = DataLoader(valid_data, shuffle=False, batch_size=64)
test_loader = DataLoader(test_data, shuffle=False, batch_size=64)

In [28]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x1a58070d90>

In [31]:
batch = next(iter(train_loader))

In [34]:
type(batch)

list

In [35]:
len(batch)

2

In [37]:
batch[0].shape

torch.Size([64, 11])

In [38]:
batch[1].shape

torch.Size([64])

## crear arquitectura de red

nuestra red recibe 11 variables de entrada y devuelve el valor de la predicción

In [41]:
from torch import nn
import torch.nn.functional as F

In [45]:
class Model(nn.Module):
    
    
    def __init__(self):
        '''en este paso se definen las capas de la arquitectura'''
        
        super().__init__()
        
        #la capa inicial recibe 11 features y devuelve 5
        self.fc1 = nn.Linear(11, 5)
        
        #capa intermedia
        self.fc2 = nn.Linear(5, 3)

        self.fc3 = nn.Linear(3, 1)
        
        
    def forward(self, x):
        '''en este paso se define la lógica para el feedforward'''
        
        #se ejecutan las capas y se aplica la función de activación relu luego de cada capa
        x = x.float()
        
        x = self.fc1(x)
        x = F.relu(x)
        
        x = self.fc2(x)
        x = F.relu(x)
        #x = F.dropout(x, 0.2)
        
        x = self.fc3(x)
                
        #se devuelven las predicciones
        return x.double()

In [46]:
modelo = Model()

In [47]:
modelo

Model(
  (fc1): Linear(in_features=11, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=3, bias=True)
  (fc3): Linear(in_features=3, out_features=1, bias=True)
)

In [52]:
list(modelo.parameters())

[Parameter containing:
 tensor([[ 0.0258,  0.1620, -0.2525, -0.2668, -0.1876,  0.1397, -0.0940,  0.2768,
           0.1810, -0.1518, -0.2394],
         [-0.0314,  0.2729, -0.0066, -0.2816, -0.1904,  0.0109, -0.1237,  0.1253,
           0.1307,  0.1223,  0.2789],
         [-0.1638, -0.2755,  0.2777, -0.1193,  0.1661,  0.1859, -0.0539, -0.0664,
          -0.2643,  0.1195,  0.1793],
         [-0.0737,  0.1694,  0.1378,  0.0439,  0.0101, -0.2495,  0.0100,  0.2662,
          -0.0760,  0.2463,  0.0061],
         [ 0.0220,  0.1645,  0.1306,  0.2329, -0.0235,  0.0350, -0.1007, -0.0715,
          -0.2413,  0.0541, -0.0429]], requires_grad=True),
 Parameter containing:
 tensor([-0.1261,  0.2904,  0.3006,  0.2927,  0.2237], requires_grad=True),
 Parameter containing:
 tensor([[ 0.1646,  0.4091,  0.2196,  0.2561, -0.1803],
         [-0.3149,  0.3185, -0.1840, -0.3326, -0.0923],
         [-0.0179, -0.4308, -0.3359,  0.3069,  0.3799]], requires_grad=True),
 Parameter containing:
 tensor([0.2705, 0.2

## crear optimizador y función de costo

In [48]:
from torch import nn, optim

In [54]:
criterion = nn.MSELoss()

In [55]:
optimizer = optim.Adam(modelo.parameters(), lr=0.1)

## entrenar la red

In [56]:
epochs = 50

for e in range(epochs):

    #valor de la función de costo inicial
    running_loss = 0

    #se va a iterar sobre los batches del dataloader definido
    
    for features, labels in train_loader:
        
        #forward
        output = modelo(features)
        
        # (interno pytorch) asegurarme de que output y labels tengan la misma forma
        output = output.view(labels.shape)
        
        # calculo de función de costo
        loss = criterion(output, labels)
        
        # resetear optimizador
        optimizer.zero_grad()
        
        # backpropagation
        loss.backward()
        
        # actualizar pesos
        optimizer.step()
        
        running_loss += loss.item()
        
    print(f"Training loss: {(running_loss/len(train_loader)) ** 0.5}")

Training loss: 17889.800392208323
Training loss: 16977.02994416123
Training loss: 13601.557733828247
Training loss: 11699.831349400823
Training loss: 11339.999182960853
Training loss: 11332.132137312698
Training loss: 11220.260913573322
Training loss: 11182.330960328947
Training loss: 11131.21427973587
Training loss: 11077.69297574503
Training loss: 11026.733824813187
Training loss: 10966.782945182955
Training loss: 10903.737686925708
Training loss: 10831.309410313022
Training loss: 10749.28622957669
Training loss: 10653.221807691085
Training loss: 10539.269922482636
Training loss: 10401.39429678699
Training loss: 10232.430056319596
Training loss: 10023.66991785189
Training loss: 9766.309859455416
Training loss: 9453.837577322758
Training loss: 9086.400577677921
Training loss: 8675.47896894988
Training loss: 8245.461017969634
Training loss: 7828.048910935539
Training loss: 7454.401832740804
Training loss: 7144.204708103251
Training loss: 6906.217045937626
Training loss: 6729.2771904976

## entrenar la red con el conjunto de validación

In [60]:
train_loader = DataLoader(train_data, shuffle=False, batch_size=64)
valid_loader = DataLoader(valid_data, shuffle=False, batch_size=64)
test_loader = DataLoader(test_data, shuffle=False, batch_size=64)

In [61]:
modelo = Model()
criterion = nn.MSELoss()
optimizer = optim.Adam(modelo.parameters(), lr=0.05)

In [62]:
epochs = 50

#funcion de costo minima en validacion
valid_loss_min = np.Inf

for e in range(epochs):

    #valor de la función de costo inicial
    train_loss = 0.0
    valid_loss = 0.0
    
    ############################################
    #              entrenamiento               #
    ############################################
    
    modelo.train()
    
    for features, labels in train_loader:
        
        output = modelo(features)
        output = output.view(labels.shape)
        
        loss = criterion(output, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
    ############################################
    #                 validacion               #
    ############################################    
    
    modelo.eval()
    
    for features, labels in valid_loader:
        
        output = modelo(features)
        output = output.view(labels.shape)
        loss = criterion(output, labels)
        
        valid_loss += loss.item()
        
    ############################################
    #           guardar el mejor modelo        #
    ############################################
    
    train_loss = (train_loss/len(train_loader)) ** 0.5
    valid_loss = (valid_loss/len(valid_loader)) ** 0.5
    
    print(f'\nEpoch: {e} \tTraining Loss: {train_loss:.6f} \tValidation Loss: {valid_loss:.6f}')
    
    if valid_loss <= valid_loss_min:
        print(f'Validation loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}).  Saving model ...')
        torch.save(modelo.state_dict(), 'modelo.pt')
        valid_loss_min = valid_loss


Epoch: 0 	Training Loss: 17912.320246 	Validation Loss: 17127.608568
Validation loss decreased (inf --> 17127.608568).  Saving model ...

Epoch: 1 	Training Loss: 17460.341675 	Validation Loss: 16124.930537
Validation loss decreased (17127.608568 --> 16124.930537).  Saving model ...

Epoch: 2 	Training Loss: 15679.574979 	Validation Loss: 13278.874394
Validation loss decreased (16124.930537 --> 13278.874394).  Saving model ...

Epoch: 3 	Training Loss: 12456.602323 	Validation Loss: 11388.206286
Validation loss decreased (13278.874394 --> 11388.206286).  Saving model ...

Epoch: 4 	Training Loss: 11570.612839 	Validation Loss: 11474.779413

Epoch: 5 	Training Loss: 11316.351504 	Validation Loss: 11202.468537
Validation loss decreased (11388.206286 --> 11202.468537).  Saving model ...

Epoch: 6 	Training Loss: 11320.998047 	Validation Loss: 11213.325474

Epoch: 7 	Training Loss: 11250.233166 	Validation Loss: 11196.895403
Validation loss decreased (11202.468537 --> 11196.895403).  Savi

## evaluar en el test

cargar el mejor modelo

In [63]:
modelo.load_state_dict(torch.load('modelo.pt'))

<All keys matched successfully>

In [64]:
modelo.eval()

Model(
  (fc1): Linear(in_features=11, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=3, bias=True)
  (fc3): Linear(in_features=3, out_features=1, bias=True)
)

In [65]:
running_loss = 0

for features, labels in test_loader:
    
    output = modelo(features)
    output = output.view(labels.shape)
    loss = criterion(output, labels)
    
    running_loss += loss.item()
    
print(f"Test loss: {(running_loss/len(test_loader)) ** 0.5}")

Test loss: 5274.831855091277
