In [3]:
## Baixando as bibliotecas


import torch
from torch import nn, optim

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


## Predefinindo os argumentos


args = {    
    'batch_size' : 20,
    'num_workers' : 4,
    'lr' : 1e-4,
    'weight_decay' : 5e-4,
    'num_epochs' : 30
}


## Utilizando a GPU


if torch.cuda.is_available():
    args['device'] = torch.device('cpu')
    
# else:
#     args['device'] = torch.device('cuda')
    
print(args['device'])

cpu


# Criando Dataframe apartir do Pandas

In [4]:
df = pd.read_csv('hour.csv')
print(df.shape)
df.head()

(17379, 17)


Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


# Separando os dados de treino e de teste

In [5]:
torch.manual_seed(1)

indices = torch.randperm(len(df)).tolist()


train_size = int(0.8 * len(df))
df_train   = df.iloc[indices[:train_size]]
df_test    = df.iloc[indices[train_size:]]

print(len(df_train), len(df_test))
display(df_test.head())

df_train.to_csv('bike_train.csv', index=False)
df_test.to_csv('bike_test.csv', index=False)



13903 3476


Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
12663,12664,2012-06-16,2,1,6,20,0,6,0,2,0.66,0.6212,0.47,0.194,123,229,352
1801,1802,2011-03-20,1,0,3,18,0,0,0,1,0.38,0.3939,0.4,0.3582,58,98,156
16567,16568,2012-11-28,4,1,11,1,0,3,1,2,0.26,0.2576,0.75,0.2239,0,12,12
8817,8818,2012-01-08,1,1,1,5,0,0,0,2,0.32,0.3333,0.49,0.1045,0,2,2
2608,2609,2011-04-23,2,0,4,14,0,6,0,1,0.58,0.5455,0.78,0.3582,182,209,391


# criando a classedataset (necessário criar classe para utilizar no pytorch)

In [6]:
class Bicicletinha(Dataset):
    def __init__(self, csv_path):
        self.dados = pd.read_csv(csv_path).to_numpy()
        
    def __getitem__(self, idx):
        # pegando o X e Y
        sample = self.dados[idx][2:14] # X
        label  = self.dados[idx][-1:]  # Y
         
        # converte para tensor        
        sample = torch.from_numpy(sample.astype(np.float32))
        label  = torch.from_numpy(label.astype(np.float32))
        
        return sample, label
    
    def __len__(self):
        
        return len(self.dados)        

## visualizando

In [7]:
train_set = Bicicletinha('bike_train.csv')
test_set = Bicicletinha('bike_test.csv')

dado, rotulo = train_set[0]

print(rotulo)
print(dado)

tensor([373.])
tensor([ 4.0000,  1.0000, 11.0000, 19.0000,  0.0000,  4.0000,  1.0000,  1.0000,
         0.3800,  0.3939,  0.2700,  0.3582])


# criando o train loader 

In [8]:
train_loader = DataLoader(train_set,
                          batch_size=args['batch_size'],
                          shuffle=True,
                          num_workers=args['num_workers'])

test_loader = DataLoader(test_set,
                          batch_size=args['batch_size'],
                          shuffle=True,
                          num_workers=args['num_workers'])

In [9]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x1abf655c0d0>

In [None]:
for batch in train_loader:
    
    dado, rotulo = batch
    print(dado.size(), rotulo.size())
    
    break

# criando o MLP

In [None]:
## definindo os parametros
input_size  = len(train_set[0][0])
hidden_size = 128
out_size = 1  ## variáveis q serão preditas

# fazendo a class nn.Module
class MLP(nn.Module):
    
    def __init__(self, input_size, hidden_size, out_size):
        super(MLP, self).__init__()
        
        self.features  = nn.Sequential(
                         nn.Linear(input_size, hidden_size),
                         nn.ReLU(),
                         nn.Linear(hidden_size, hidden_size),
                         nn.ReLU()
                         )
            
        self.out     = nn.Linear(hidden_size, out_size)
                
    def forward(self, X):
        
                
        feature = self.features(X)
        output =  self.out(feature)
        
        
        return output
    

## jogando os dados na GPU
net = MLP(input_size, hidden_size, out_size).to(args['device'])  ## cast na GPU
print(net)

# definindo o loss e o otimizador

In [None]:
criterion =nn.L1Loss().to(args['device'])
optimizer = optim.Adam(net.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

# fluxo de treinamento e validação

In [None]:
def train(train_loader, net, epoch):
    
    net.train()
    
    epoch_loss = []    
    for batch in train_loader:
        
        dado, rotulo = batch
        
        # Cast GPU
        dado   = dado.to(args['device'])
        rotulo = rotulo.to(args['device'])
        
        # Forward        
        predic = net(dado)
        loss = criterion(predic, rotulo)
        epoch_loss.append(loss.cpu().data)
        
        # Backward        
        loss.backward()
        optimizer.step()
        
    epoch_loss = np.asarray(epoch_loss)
    
    print('Epoca {}, Loss {:.4f} +/- {:.4f}'.format(epoch, epoch_loss.mean(), epoch_loss.std()))    

In [None]:
def test(test_loader, net, epoch):
    
    net.eval()
    with torch.no_grand():     
        epoch_loss = []
        for batch in test_loader:

            dado, rotulo = batch

            # Cast GPU
            dado   = dado.to(args['device'])
            rotulo = rotulo.to(args['device'])

            # Forward

            predic = net(dado)
            loss = criterion(predic, rotulo)
            epoch_loss.append(loss.cpu().data)

        epoch_loss = np.asarray(epoch_loss)

        print('Epoca {}, Loss {:.4f} +/- {:.4f}'.format(epoch, epoch_loss.mean(), epoch_loss.std()))    

In [None]:
for epoch in range(args['num_epochs']):
    train(train_loader, net, epoch)
    test(test_loader, net, epoch)      

In [None]:
# # Resumo do código: Função forward()
# def forward(loader, net, epoch, mode):
#     if mode == "train":
#         net.train()
#     else:
#         net.eval()

#     epoch_loss = []
#     for batch in loader:
#         dado, rotulo = batch

#         # Cast na GPU
#         dado   = dado.to(args['device'])
#         rotulo = rotulo.to(args['device'])

#         # Forward 
#         pred = net(dado)
#         loss = criterion(pred, rotulo)
#         epoch_loss.append(loss.cpu().data)

#         if mode == "train":
#             # Backward
#             loss.backward()
#             optimizer.step()

#     epoch_loss = np.asarray(epoch_loss)
#     print("Epoca %d, Loss: %.4f +\- %.4f" % (epoch, epoch_loss.mean(), epoch_loss.std()) )

In [None]:
# # Resumo do código: Chamada de função
# for epoch in range(args['num_epochs']):
#     forward(train_loader, net, epoch, "train")
#     forward(test_loader, net, epoch, "test")
#     print("-------------------------------")