In [None]:
### TODOS

# implement gradient clipping 


In [75]:
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.utils.rnn as rnn
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

In [104]:
import inspect
import pdb

In [74]:
q_config = {
    
#     RUNTIME PARAMETERS
    'prod': False,
    'device': ("cuda" if torch.cuda.is_available() else "cpu"),
    
    'lback': False,
    
#     MODEL STRUCTURE PARAMETER
    
    
    'variable': "Quarterly",
    'run': "50/45 (1,2),(4,8), LR=0.001/{10,1e-4f}, EPOCHS=15, LVP=80 40*",
    'percentile': 50,
    'training_percentile': 45,
    'dilations': ((1, 2), (4, 8)),
    'use_residual_lstm': False,
    'add_nl_layer': False,
    'initial_learning_rate': 1e-3,
    'learning_rates': ((10, 1e-4)),
    'per_series_lr_multip': 1,
    'num_of_train_epochs': 15,
    'state_hsize': 40,
    'seasonality': 4,
    'input_size': 4,
    'output_size': 8,
    'min_inp_seq_len': 0,
    'level_variability_penalty': 80,
    'batch_size': 8
    
    'num_of_categories': 6, # in data provided
    'big_loop': 3,
    'num_of_chunks': 2,
    'eps': 1e-6,
    'averaging_level': 5,
    'use_median': False,
    'middle_pos_for_avg': 2, # if using medians
    'noise_std'=0.001, 
    'freq_of_test': 1,
    'gradient_clipping': 20,
    'c_state_penalty': 0,
    'big_float': 1e38, # numeric_limits<float>::max(),
    'print_diagn': True,
    'max_num_of_series': -1,
    
    'use_auto_learning_rate': False,
    'min_learning_rate': 0.0001f,
    'lr_ratio': sqrt(10),
    'lr_tolerance_multip': 1.005,
    'l3_period': 2,
    'min_epochs_before_changing_lrate': 2,
    'print_train_batch_every': 5
}

q_config['input_size_i'] = q_config['input_size']
q_config['output_size_i'] = q_config['output_size']
q_config['min_series_length'] = q_config['input_size_i'] + q_config['output_size_i'] + q_config['min_inp_seq_len'] + 2
q_config['max_series_length'] = 40 * q_config['seasonality'] + q_config['min_series_length']
q_config['tau']: q_config['percentile'] / 100
q_config['training_tau']: q_config['training_percentile'] / 100
q_config['attention_hsize']: q_config['state_hsize']
    
if not q_config['prod']:
    q_config['batch_size'] = 2
    q_config['max_num_of_series'] = 40
    

In [90]:
info = pd.read_csv('./data/info.csv')

In [45]:
def read_file(file_location):
    series = []
    ids = []
    with open(file_location, 'r') as file:
        data = file.read().split("\n")

    for i in range(1, len(data)-1):
        row = data[i].replace('"', '').split(',')
        series.append(np.array([float(j) for j in row[1:] if j != ""]))
        ids.append(row[0])

    series = np.array(series)
    return series

In [96]:
def create_val_set(train, output_size):
    val = []
    for i in range(len(train)):
        val.append(train[i][-output_size:])
        train[i] = train[i][:-output_size]
    return np.array(val)

In [98]:
def create_datasets(train_file_location, test_file_location, output_size):
    train = read_file(train_file_location)
    test = read_file(test_file_location)
    vals = create_val_set(train, output_size)
    return train, vals, test        

In [99]:
train, val, test = create_datasets('./data/M4DataSetTrain/Quarterly-train.csv', 
                                   './data/M4DataSetTest/Quarterly-test.csv', 
                                   q_config['output_size'])

In [133]:
class SeriesDataset(Dataset):
    
    def __init__(self, dataTrain, dataVal, dataTest, info, variable, device):
        self.dataInfoCatOHE = pd.get_dummies(info[info['SP'] == variable]['category'])
        self.dataInfoCatHeaders = self.dataInfoCatOHE.columns
        self.dataInfoCat = torch.from_numpy(self.dataInfoCatOHE.values)
        self.dataTrain = [torch.tensor(i) for i in dataTrain]   
        self.dataVal = [torch.tensor(i) for i in dataVal]
        self.dataTest = [torch.tensor(i) for i in dataTest] 
        self.device = device
    
    def __len__(self):
        return len(self.dataTrain)
        
    def __getitem__(self, idx):
        return self.dataTrain[idx].to(self.device), \
                self.dataVal[idx].to(self.device), \
                self.dataTest[idx].to(self.device), \
                self.dataInfoCat[idx].to(self.device), \
                idx

def collate_lines(seq_list):
    train_, val_, test_, info_cat_, idx_ = zip(*seq_list)
    train_lens = [len(seq) for seq in train_]
    seq_order = sorted(range(len(train_lens)), key=train_lens.__getitem__, reverse=True)
    train = [train_[i] for i in seq_order]
    val = [val_[i] for i in seq_order]
    test = [test_[i] for i in seq_order]
    info_cat = [info_cat_[i] for i in seq_order]
    idx = [idx_[i] for i in seq_order]
    return train, val, test, info_cat, idx

dataset = SeriesDataset(train, val, test, info, q_config['variable'], q_config['device'])
dataloader = DataLoader(dataset, batch_size=q_config['batch_size'], shuffle=True, collate_fn=collate_lines)

In [405]:
class ESRNN(nn.Module):
    def __init__(self, num_series, seasonality, hidden_size, output_size):
        super(ESRNN, self).__init__()
        init_lev_sms = []
        init_seas_sms = []
        init_seasonalities = []
        
        # NEED TO ENSURE THAT THE GRADIENTS ARE ACCRUEING, IF NOT NEED TO TRY CREATING THESE PARAMETERS AS VARIABLES
        # ANOTHER THING TO LOOK AT IS RATHER THAN INDEXING NORMALLY TO USE INDEX_SELECT METHOD ON THE TENSOR
#             UPDATE 2018-11-30: PARAMETERS SHOWING IN MODEL PRINT (AREDD)
        for i in range(num_series):
            init_lev_sms.append(nn.Parameter(torch.Tensor([0.5])))
            init_seas_sms.append(nn.Parameter(torch.Tensor([0.5])))
            init_seasonalities.append(nn.Parameter(torch.ones(seasonality) * 0.5))
        
        self.init_lev_sms = nn.ParameterList(init_lev_sms)
        self.init_seas_sms = nn.ParameterList(init_seas_sms)
        self.init_seasonalities = nn.ParameterList(init_seasonalities)
        
        self.nl_layer = nn.Linear(hidden_size, hidden_size)
        self.act = nn.Tanh()
        self.scoring = nn.Linear(hidden_size, output_size)
    
    def forward(self, train, val, test, info_cat, idxs, add_nl_layer):
#         GET THE PER SERIES PARAMETERS
        lev_sms = torch.stack([self.init_lev_sms[idx] for idx in idxs])
        seas_sms = torch.stack([self.init_seas_sms[idx] for idx in idxs])
        seasonalities = torch.stack([self.init_seasonalities[idx] for idx in idxs])

        
#         WINDOWING LOOP
#         TIME LOOP
#         RNN STUFF HERE
        
#         if add_nl_layer:
#             out = self.nl_layer(out)
#             out = self.act(out)
#         out = self.scoring(out)
            

In [None]:
class ESRNNTrainer(nn.Module):
    def __init__(self, model, dataloader, run_id):
        super(ESRNNTrainer, self).__init__()
        self.model = model.to(config['device'])
        self.dl = dataloader
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config['learning_rate'], eps=q_config['eps'])
        self.criterion = None
        self.epochs = 0
        self.max_epochs = config['num_epochs']
        self.run_id = str(run_id)
        self.prod_str = 'prod' if config['prod'] else 'dev'               
        self.log = Logger("./logs/train%s%s" % (self.prod_str, self.run_id))   
        
    def train(self):
        self.model.train()
        epoch_loss = 0
        num_batches = 0
        for batch_num, (train, val, test, info_cat, idx) in enumerate(self.dl):
            if batch_num % q_config['print_train_batch_every'] == 0:
                print("train_batch: %d" % batch_num)
            loss = self.train_batch(inputs, targets_inputs, targets_output, target_lens)
            epoch_loss += loss
        epoch_loss = epoch_loss / (batch_num + 1)
        self.epochs += 1
        print('[TRAIN]  Epoch [%d/%d]   Loss: %.4f
                      % (self.epochs, self.max_epochs, epoch_loss))
        info = {'loss': epoch_loss}
        for tag, value in info.items():
            self.log.log_scalar(tag, value, self.epochs + 1)    
        return epoch_loss

    def train_batch(self, train, val, test, info_cat, idx):
        self.optimizer.zero_grad()
        output = self.model(train, val, test, info_cat, idx)
        loss = self.criterion(output, targets)
        loss.backward()
        self.optimizer.step()
        return float(loss)

### Pinball Loss 

In [None]:
# Expression pinBallLoss(const Expression& out_ex, const Expression& actuals_ex) {//used by Dynet, learning loss function
#   vector<Expression> losses;
#   for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
#     auto forec = pick(out_ex, indx);
#     auto actual = pick(actuals_ex, indx);
#     if (as_scalar(actual.value()) > as_scalar(forec.value()))
#       losses.push_back((actual - forec)*TRAINING_TAU);
#     else
#       losses.push_back((actual - forec)*(TRAINING_TAU - 1));
#   }
#   return sum(losses) / OUTPUT_SIZE * 2;
# }

In [386]:
#     as defined in the blog post --- https://eng.uber.com/m4-forecasting-competition/
class PinballLoss(nn.Module):

    def __init__(self, training_tau, output_size):
        super(PinballLoss, self).__init__()
        self.training_tau = training_tau
        self.output_size = output_size
    
    def forward(self, predictions, actuals):
        losses = []
        for i in range(self.output_size):
            prediction = predictions[i]
            actual = actuals[i]
            if actual > prediction:
                losses.append((actual - prediction) * self.training_tau)
            else:
                losses.append((actual - prediction) * (self.training_tau - 1))
        loss = torch.Tensor(losses)
        return torch.sum(loss) / self.output_size * 2

In [387]:
test1 = torch.rand(100)
test2 = torch.rand(100)
pb = PinballLoss(0.5, 100)
pb(test1, test2)

tensor(0.2912)

### sMAPE 

In [None]:
# float sMAPE(vector<float>& out_vect, vector<float>& actuals_vect) {
#   float sumf = 0;
#   for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
#     auto forec = out_vect[indx];
#     auto actual = actuals_vect[indx];
#     sumf+=abs(forec-actual)/(abs(forec)+abs(actual));
#   }
#   return sumf / OUTPUT_SIZE * 200;
# }


In [294]:
def sMAPE(predictions, actuals, output_size):
    sumf = 0
    for i in range(output_size):
        prediction = predictions[i]
        actual = actuals[i]
        sumf += abs(prediction - actual) / (abs(prediction) + abs(actual))
        
    return sumf / output_size * 200

In [329]:
test1 = torch.rand(100)
test2 = torch.rand(100)
sMAPE(test1, test2, 100)

tensor(76.0139)

### wQuantLoss

In [None]:
# float wQuantLoss(vector<float>& out_vect, vector<float>& actuals_vect) {
#   float sumf = 0; float suma=0;
#   for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
#     auto forec = out_vect[indx];
#     auto actual = actuals_vect[indx];
#     suma+= abs(actual);
#     if (actual > forec)
#       sumf = sumf + (actual - forec)*TAU;
#     else
#       sumf = sumf + (actual - forec)*(TAU - 1);
#   }
#   return sumf / suma * 200;
# }

In [331]:
def wQuantLoss(predictions, actuals, output_size, training_tau):
    sumf = 0
    suma = 0
    for i in range(output_size):
        prediction = predictions[i]
        actual = actuals[i]
        
        suma += abs(actual)
        if (actual > prediction):
            sumf = sumf + (actual - prediction) * training_tau
        else:
            sumf = sumf + (actual - prediction) * (training_tau - 1)
            
    return sumf / suma * 200

In [350]:
test1 = torch.rand(100)
test2 = torch.rand(100)
wQuantLoss(test1, test2, 100, 0.5)

tensor(70.5309)

### ErrorFunc

In [None]:
# float errorFunc(vector<float>& out_vect, vector<float>& actuals_vect) {
#   if (PERCENTILE==50)
#     return sMAPE(out_vect, actuals_vect);
#   else
#     return wQuantLoss(out_vect, actuals_vect);
# }

In [351]:
def errorFunc(predictions, actuals, output_size, percentile):
    if (percentile == 50):
        return sMAPE(predictions, actuals, output_size)
    else:
        return wQuantLoss(predictions, actuals, output_size, percentile / 100)

In [374]:
test1 = torch.rand(100)
test2 = torch.rand(100)
print(errorFunc(test1, test2, 100, 48))
print(wQuantLoss(test1, test2, 100, 0.48))

print(errorFunc(test1, test2, 100, 50))
print(sMAPE(test1, test2, 100))

tensor(55.5585)
tensor(55.5585)
tensor(68.8411)
tensor(68.8411)
