In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import seaborn as sns

import sys
sys.path.append('..')
from src.preprocessing import DataPreprocessing
from src.data_models import DataModels

import wandb

In [2]:
import wandb
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmor3no[0m ([33mteam-ss[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

### Data

In [3]:
DATA_DIR = "../data/raw"
DATE = "_06_06_22"
PURPLE_PATH = f"{DATA_DIR}/purple{DATE}.csv"
AIRE_PATH = f"{DATA_DIR}/aire{DATE}.csv"

In [4]:
dp = DataPreprocessing(PURPLE_PATH, AIRE_PATH)
dp.preprocess()
purple, aire = dp.get_data()
data = DataModels(purple, aire)
data.create_hour_col()

In [5]:
purple, aire = data.get_data()
X = purple.values
y = aire

In [6]:
X.shape

(28359, 5)

In [7]:
y.shape

(28359,)

In [8]:
## Split data
X_train, X_test, y_train, y_test = train_test_split(purple, aire)
X_train = torch.FloatTensor(X_train.values)
X_test = torch.FloatTensor(X_test.values)
y_train = torch.FloatTensor(y_train).view(-1,1)
y_test = torch.FloatTensor(y_test).view(-1,1)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)

torch.Size([21269, 5])
torch.Size([21269, 1])
torch.Size([7090, 5])


# NN: Ecuación de correción

In [9]:
sweep_config = {
    'method': 'random'
    }

In [None]:
parameters_dict = {
    'optimizer': {
        'values': ['adam']
        },
    'fc_layer_size': {
        'values': [128, 256, 512]
        },
    'dropout': {
          'values': [0.3, 0.4, 0.5]
        },
    'batch_size': {
          'values': [512, 1024]
        },
    'learning_rate': {
          'values': [ 0.01, 0.02]
        },
    }

sweep_config['parameters'] = parameters_dict

In [None]:
parameters_dict.update({
    'epochs': {
        'value': 50}
    })

In [48]:
sweep_id = wandb.sweep(sweep_config, project="cei-sweeps-demo")

Create sweep with ID: iejpknwg
Sweep URL: https://wandb.ai/team-ss/cei-sweeps-demo/sweeps/iejpknwg


In [49]:
class TabularModel(nn.Module):
    
    def __init__(self, layers_sz):
        super().__init__()
        
        layers = list()
        
        for idx in range(len(layers_sz)-2):
            layers.append(nn.Linear(layers_sz[idx], layers_sz[idx+1]))
            layers.append(nn.ReLU(inplace=True))

        layers.append(nn.Linear(layers_sz[-2], layers_sz[-1]))
        
        self.layers_nn = nn.Sequential(*layers)
        
    def forward(self, X):

        out = self.layers_nn(X)
        return out

In [51]:
criterion = nn.MSELoss()

def build_dataset(batch_size):
    
    dataset = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
    loader = DataLoader(dataset, batch_size=batch_size)
    return loader


def build_network(fc_layer_size):
    network = TabularModel([5, fc_layer_size, fc_layer_size, 1])
    return network

    
def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = torch.optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9)
    elif optimizer == "adam":
        optimizer = torch.optim.Adam(network.parameters(),
                               lr=learning_rate)
    return optimizer


def train_epoch(network, loader, optimizer):
    cumu_loss = 0
    for idx, (data, target) in enumerate(loader):

        optimizer.zero_grad()

        # ➡ Forward pass
        y_pred = network(data)
        loss = criterion(y_pred, target)
        cumu_loss += loss
        # ⬅ Backward pass + weight update
        loss.backward()
        optimizer.step()

        #wandb.log({"train batch loss": loss})
        with torch.no_grad():
            y_pred = network(X_test)
            val_loss = criterion(y_pred, y_test)
            wandb.log({"val loss": val_loss.item()})

    return cumu_loss / len(loader)

In [50]:
def train(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        loader = build_dataset(config.batch_size)
        network = build_network(config.fc_layer_size)
        optimizer = build_optimizer(network, config.optimizer, config.learning_rate)

        for epoch in range(config.epochs):            
            avg_loss = train_epoch(network, loader, optimizer)
            wandb.log({"loss": avg_loss, "epoch": epoch})

In [None]:
wandb.agent(sweep_id, train)