# Setup


In [1]:
import math
import os

import numpy as np
import torch

ROOT_DIR=os.path.dirname(os.path.realpath(__file__))
DATA_DIR=os.path.join(ROOT_DIR,"data","godaddy-microbusiness-density-forecasting") ##Directory of dataset

EXPERIMENTS_DIR=os.path.join(ROOT_DIR, "logs/experiments")
use_cuda = torch .cuda.is_available()
DEVICE = torch.device("cuda" if use_cuda else "cpu")


N_CENSUS_FEATURES= 5 #pct_bb,pct_college,pct_foreign_born,pct_it_workers,median_hh_inc
#cfips is not considered as a feature we use a one-hot encoding for it


USE_CENSUS= False #Without census features

AE_LATENT_DIM= 32

LSTM_HIDDEN_DIM = 8

SEQ_LEN=6
SEQ_STRIDE= 1

N_COUNTY=3142
N_DIMS_COUNTY_ENCODING=  math.ceil(math.log(N_COUNTY,2))

FEATURES_AE_CENSUS_DIR=os.path.join(EXPERIMENTS_DIR, "features_ae_2_dims")
FEATURES_AE_LATENT_DIM= 2

TRAIN_FILE= os.path.join(DATA_DIR, "train.csv")
TEST_FILE= os.path.join(DATA_DIR, "test.csv")

CENSUS_FILE =os.path.join(DATA_DIR, "census_interpolated.csv")

NB_FUTURES= 10 #Number of days to predict


#Scaling factors for microbusiness density
MEAN_MB= 3.817671
STD_MB= 4.991087

MAX_MB= 300
MIN_MB= 0.0

NameError: name '__file__' is not defined

## Utils

In [11]:
import json
import os
from enum import Enum
from itertools import islice
import numpy as np
import pandas as pd
import torch
import logging
import os
import sys
from time import strftime
def setup_logger(args):
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    a_logger = logging.getLogger()
    a_logger.setLevel(args.log_level)
    log_dir=os.path.join(ROOT_DIR,"logs","output_logs")
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    output_file_handler = logging.FileHandler(os.path.join(log_dir,strftime("log_%d_%m_%Y_%H_%M.log")))
    stdout_handler = logging.StreamHandler(sys.stdout)
    stdout_handler.setFormatter(formatter)
    a_logger.propagate=False
    a_logger.addHandler(output_file_handler)
    a_logger.addHandler(stdout_handler)

def read_json(path_json):
    with open(path_json, encoding='utf8') as json_file:
        return json.load(json_file)
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x))
def chunks(data, SIZE):
    """Split a dictionnary into parts of max_size =SIZE"""
    it = iter(data)
    for _ in range(0, len(data), SIZE):
        yield {k: data[k] for k in islice(it, SIZE)}

def sorted_dict(x, ascending=True):
    """
    Sort dict according to value.
    x must be a primitive type: int,float, str...
    @param x:
    @return:
    """
    return dict(sorted(x.items(), key=lambda item: (1 if ascending else -1) * item[1]))
def reverse_dict(input_dict):
    """
    Reverse a dictonary
    Args:
        input_dict:

    Returns:

    """
    inv_dict = {}
    for k, v in input_dict.items():
        inv_dict[v] = inv_dict.get(v, []) + [k]

    return inv_dict

def save_matrix(matrix,filename):
    with open(filename,'wb') as output:
        np.save(output,matrix)
def load_matrix(filename,auto_delete=False):
    with open(filename,'rb') as input:
        matrix=np.load(input)

    if auto_delete:
        os.remove(filename)
    return matrix



class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0



from enum import Enum
class DatasetType(Enum):
    TRAIN="train"
    VALID="valid"
    TEST="test"



def extract_census_features(row,cfips_index,single_row=True):
    """

    @param row: Row of the dataframe
    @param cfips_index: index of the cfips for one-hot encoding
    @return:
    """
    ##If series :


    if single_row:
        features_tensor = torch.tensor( [row['pct_bb'],
                                        row['pct_college'],
                                        row['pct_foreign_born'],
                                        row['pct_it_workers'],
                                        row['median_hh_inc']
                                        ], dtype=torch.float32)
        cfips_one_hot = get_cfips_encoding(row['cfips'], cfips_index)
        # Min-max normalization
        features_tensor[ 0] = (features_tensor[ 0] - 24.5) / (97.6 - 24.5)
        features_tensor[ 1] = (features_tensor[ 1] / 48)
        features_tensor[ 2] = (features_tensor[ 2] / 54)
        features_tensor[ 3] = (features_tensor[ 3] / 17.4)
        features_tensor[ 4] = (features_tensor[ 4] - 17109) / (1586821 - 17109)

    else :
        features_tensor= torch.from_numpy(row[['pct_bb', 'pct_college', 'pct_foreign_born', 'pct_it_workers', 'median_hh_inc']].values)
        row_one_hots= [get_cfips_encoding(cfips,cfips_index) for cfips in row['cfips']]
        cfips_one_hot = torch.stack(row_one_hots)
        #Min-max normalization
        features_tensor[:,0] = (features_tensor[:,0]- 24.5)/ (97.6-24.5)
        features_tensor[:,1] = (features_tensor[:,1] /48)
        features_tensor[:,2] = (features_tensor[:,2]/ 54)
        features_tensor[:,3] = (features_tensor[:,3] / 17.4)
        features_tensor[:,4] = (features_tensor[:,4]- 17109)/(1586821-17109)


    ##Add one-hot encoding of cfips
    if single_row:
        features_tensor = torch.cat((cfips_one_hot, features_tensor))
    else:
        features_tensor = torch.cat((cfips_one_hot,features_tensor), 1)

    return features_tensor.float()






def get_cfips_index():
    """
    Return a dictionary with key=cfips and value=index for using a one-hot encoding
    """
    df= pd.read_csv(os.path.join(DATA_DIR, "census_ae.csv"))
    cfips = df['cfips'].unique()
    cfips.sort()
    #Sort cfips
    return {cfips[i]: i for i in range(len(cfips))}


def get_cfips_encoding(cfips,cfips_index):
    """
     return the base 2 encoding of cfips
    """

    #n_dims is the number of bits needed to represent the cfips

    bin_index=np.binary_repr(cfips_index[cfips],width=N_DIMS_COUNTY_ENCODING)
    enc = torch.tensor([int(x) for x in bin_index],dtype=torch.float32)
    return enc





# Network

In [None]:
import json
import logging
import os

import numpy as np
import torch
from torch import nn
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        #Sin/Cos positional encoding
        super().__init__()
        self.d_model = d_model
        self.max_len = max_len
        self.pe = torch.zeros(self.max_len, self.d_model)
        position = torch.arange(0, self.max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.d_model, 2).float() * (-np.log(10000.0) / self.d_model))
        self.pe[:, 0::2] = torch.sin(position * div_term)
        self.pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = self.pe.unsqueeze(0).transpose(0, 1).squeeze(1)
        #COnvert to nn.Parameter
        self.pe = nn.Parameter(self.pe, requires_grad=False)



    def forward(self, x):
        #Add positional encoding to the input (Pay attention to the dimensions (the pe does not have the batch dimension))
        x = x + self.pe[:x.size(0), :]
        return x



class TransformerPredictor(nn.Module):

    def __init__(self,
                 emb_dim=32,
                 n_layers=3,
                 n_head=8,
                 max_seq_len=100,
                 dim_feedforward=128,
                 use_derivative=True,
                 use_census=USE_CENSUS,
                 n_dims_census_emb=2,
                 experiment_dir="my_model", reset=False, load_best=True):
        """
        @param features_encoder :
        @param input_dim:
        @param hidden_dim:
        @param ues_encoder:²
        @param experiment_dir:
        @param reset:
        @param load_best:
        """

        super(TransformerPredictor, self).__init__()
        self.variante_num=4
        self.emb_dim = emb_dim
        self.n_layers = n_layers
        self.n_head = n_head
        self.dim_feedforward = dim_feedforward
        self.use_census = use_census
        self.max_seq_len = max_seq_len
        self.census_features_encoder = None
        self.n_dims_census_emb = n_dims_census_emb
        self.input_dim =1
        self.use_derivative = use_derivative
        if self.use_derivative:
            self.input_dim += 2 # 2 for derivative

        if self.use_census:
            self.input_dim = self.input_dim  + self.n_dims_census_emb



        self.experiment_dir = experiment_dir
        self.model_name = os.path.basename(self.experiment_dir)
        self.reset = reset
        self.load_best = load_best
        self.setup_dirs()
        self.setup_network()


        if not reset: self.load_state()

    ##1. Defining network architecture
    def setup_network(self):
        """
        Initialize the network  architecture here
        @return:
        """
        #Input encoder from self.input_dim to self.emb_dim along with positional encoding
        if self.use_census:
            self.query_encoder = nn.Sequential(nn.Linear(N_DIMS_COUNTY_ENCODING + N_CENSUS_FEATURES, self.emb_dim))
            self.census_features_encoder= nn.Sequential(
                nn.Linear(N_CENSUS_FEATURES,self.n_dims_census_emb),
            )

        self.input_embedding = nn.Sequential(
            nn.Linear(self.input_dim, self.emb_dim),
        )

        ##Positional encoding
        self.positional_encoding = PositionalEncoding(self.emb_dim, max_len=self.max_seq_len)
        self.dropout = nn.Dropout(p=0.1)
        self.transformer_encoder  = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.n_head, dim_feedforward=self.dim_feedforward,
                                       dropout=0.01,
                                       batch_first=True),
            num_layers=self.n_layers
        )
        self.transformer_decoder = nn.TransformerDecoder(
            nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.n_head, dim_feedforward=self.dim_feedforward,
                                       dropout=0.01,
                                       batch_first=True),
            num_layers=self.n_layers
        )

        if self.use_census:
            self.regressor = nn.Sequential(
                nn.Linear(2*self.emb_dim, 1)
            )

        else:
            self.regressor = nn.Sequential(
                nn.Linear(self.emb_dim, 1)
            )


    ##2. Model Saving/Loading
    def load_state(self, best=False):
        """
        Load model
        :param self:
        :return:
        """
        if best and os.path.exists(self.save_best_file):
            logging.info(f"Loading best model state : {self.save_file}")
            self.load_state_dict(torch.load(self.save_file, map_location=DEVICE))
            return

        if os.path.exists(self.save_file):
            logging.info(f"Loading model state : {self.save_file}")
            self.load_state_dict(torch.load(self.save_file, map_location=DEVICE))

    def save_state(self, best=False):
        if best:
            logging.info("Saving best model")
            torch.save(self.state_dict(), self.save_best_file)
        torch.save(self.state_dict(), self.save_file)

    ##3. Setupping directories for weights /logs ... etc
    def setup_dirs(self):
        """
        Checking and creating directories for weights storage
        @return:
        """
        self.save_file = os.path.join(self.experiment_dir, f"{self.model_name}.pt")
        self.save_best_file = os.path.join(self.experiment_dir, f"{self.model_name}_best.pt")
        if not os.path.exists(self.experiment_dir):
            os.makedirs(self.experiment_dir)

    #4. Forward call
    def forward(self, X_input):
        """
        +Forward call here.
        It a time series, so we need the full sequence output (strided by 1)
        @param X:
        @return:
        """
        #0. Preparing the input (Removing the target from the input)
        X = X_input[:, :-1, :] # Removing the target from the input (Only required when using census features)


        if self.use_derivative:
            d_left= torch.zeros((X.shape[0], X.shape[1], 1), device=DEVICE)
            d_left[:,1:, -1] = X[:, 1:, -1] - X[:, :-1, -1]

            d_right= torch.zeros((X.shape[0], X.shape[1], 1), device=DEVICE)
            d_right[:,:-1, -1] = X[:, 1:, -1] - X[:, :-1, -1]

            X = torch.cat((d_left, X, d_right), dim=-1) ## Adding the derivative to the input as a new feature


        if self.use_census:
            target = X[:, -1, :]  # Last element of the sequence is the target .
            query = self.query_encoder(target[:, :N_DIMS_COUNTY_ENCODING+N_CENSUS_FEATURES])

            enc_census = self.census_features_encoder(X[:, :, N_DIMS_COUNTY_ENCODING:N_CENSUS_FEATURES + N_DIMS_COUNTY_ENCODING])
            X = torch.cat((X[:, :, N_CENSUS_FEATURES + N_DIMS_COUNTY_ENCODING:], enc_census), dim=-1)


        #2. Apply the input encoder to the input
        X = self.input_embedding(X)

        #3. Add the positional encoding
        X = self.positional_encoding(X)


        #4. Add a query token to the input. Encoding of the cfips. (It is the same for all the sequence)
        if self.use_census:
            X = torch.cat((query.unsqueeze(1), X), dim=1)


        #4. Apply the transformer encoder to get the memory
        X = self.transformer_encoder(X)

        if self.use_census:
            query_enc= X[:, 0, :]
            X = X[:, 1:, :]#Removing the query token



        #.5 Apply the transformer decoder to get the next item in the sequence
        tgt_sequence = torch.zeros(X.shape[0], 1, X.shape[-1]).to(DEVICE)
        tgt_mask = torch.ones(1,1).to(DEVICE)

        #6. Then apply the transformer to get the next item in the sequence
        output = self.transformer_decoder(tgt_sequence, memory=X, tgt_mask= tgt_mask)#We want to predict the next item in the sequence

        #7.We only want the last output of the sequence
        output= output[:, -1, :]
        if self.use_census:
            output= torch.cat((output, query_enc), dim=-1)

        #3. Finally apply the regressor to get the predictions.
        output = self.regressor(output)

        return output







# Dataset

In [None]:
import os

import pandas as pd
import torch
from torch.utils.data import Dataset
class LstmDataset(Dataset):
    def __init__(self, type, seq_len, stride=1):
        self.type = type
        self.seq_len = seq_len
        self.stride = stride

        self.file = os.path.join(DATA_DIR, f"train_with_census_{'train' if type==DatasetType.TRAIN else 'val' if type==DatasetType.VALID  else 'test'}.csv")
        self.load_data()

    def init_transforms(self):
        """
        Initialize transforms.Might be different for each dataset type
        """

    def load_data(self):
        """
        Load data from the data items if necessary
        """
        self.data = pd.read_csv(self.file)
        self.data['first_day_of_month'] = pd.to_datetime(self.data['first_day_of_month'])

    def __len__(self):
        return len(self.data) // self.stride

    def __getitem__(self, item):
        """
        Retrieving seq_len data
        1. The county (CFIPS) should be the same
        2. And the difference between the date(first_day_of_month) should be at most 3 months
        """
        i = item * self.stride
        county = self.data.iloc[i]['cfips']

        rows_data=self.data.iloc[i:i+self.seq_len]

        #Check if the county is the same
        is_valid = len(rows_data)==self.seq_len and (rows_data['cfips'].unique()[0]==county) and (rows_data['first_day_of_month'].diff().max()<pd.Timedelta(days=90))

        if not is_valid:
            ##Find a random item that is valid
            return self.__getitem__(torch.randint(0, len(self), (1,)).item())

        #Taking seq_len rows and considering the following features
        #pct_bb,pct_college,pct_foreign_born,pct_it_workers,median_hh_inc, active,microbusiness_density
        features_tensor = torch.tensor(
            rows_data[['pct_bb', 'pct_college', 'pct_foreign_born', 'pct_it_workers', 'median_hh_inc','year', 'active',
                        'microbusiness_density']].values, dtype=torch.float32)

        #return the iterator
        return features_tensor

import json
import os
from unicodedata import category
import numpy as np
import pandas as pd
import torch
from PIL import Image
from matplotlib import pyplot as plt
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from tqdm import tqdm



from enum import Enum




class CensusDataset(Dataset):
    def __init__(self, type):
        self.type=type
        self.load_data()
        pass

    def load_data(self):
        """
        Load data from the data items if necessary
        Returns:

        """
        self.data_file=os.path.join(DATA_DIR,f"train_with_census_ae_{'train' if self.type == DatasetType.TRAIN else 'test'}.csv")
        self.data = pd.read_csv(self.data_file)




    def __len__(self):
        return len(self.data)


    def __getitem__(self, idx):
        """
        pct_bb,pct_college,pct_foreign_born,pct_it_workers,median_hh_inc,year .
        Retrieve the following features from the dataset and return the corresponding tensor

        Returns:
        """
        row=self.data.iloc[idx]
        features_tensor=torch.tensor([row['pct_bb'],row['pct_college'],row['pct_foreign_born'],\
                                      row['pct_it_workers'],row['median_hh_inc'],row['year']],dtype=torch.float32)
        return features_tensor







# Loss and metrics

In [None]:
import torch
from torch import nn


class SmapeCriterion(nn.Module):
    """
    Class to compute the SMAPE loss.
    """
    def __init__(self):
        super(SmapeCriterion, self).__init__()

    def forward(self, y_pred, y_true):
        """
        @param y_pred: Predicted values
        @param y_true: True values
        @return: SMAPE loss
        """
        eps = 1e-8
        return 100*torch.mean(2 * torch.abs(y_pred - y_true) / (torch.abs(y_pred) + torch.abs(y_true) + eps))

    def __str__(self):
        return "SMAPE"

    def __repr__(self):
        return str(self)



# Dataset

In [None]:
import logging
import os
import random

import pandas as pd
import torch
from torch.utils.data import Dataset
from tqdm import tqdm

EVAL_START_DATE = "2022-05-01"
TEST_START_DATE =  "2022-11-01"

SEED=42
class MicroDensityDataset(Dataset):
    def __init__(self, type, seq_len, stride=1,use_census=USE_CENSUS):
        self.type = type
        self.seq_len = seq_len
        self.stride = stride if type == DatasetType.TRAIN else 1
        self.use_census = use_census
        self.load_data()
        self.prepare_sequences()


    def init_transforms(self):
        """
        Initialize transforms.Might be different for each dataset type
        """

    def load_data(self):
        """
        Load data from the data items if necessary
        """

        self.main_file = os.path.join(DATA_DIR, "train.csv")
        self.main_df = pd.read_csv(self.main_file)

        if self.type == DatasetType.TEST:
            self.test_df = pd.read_csv(TEST_FILE)
            self.test_df["microbusiness_density"] = [0 for _ in range(len(self.test_df))]
            self.test_df["county"] =["NAN" for _ in range(len(self.test_df))]
            self.test_df["state"] =["NAN" for _ in range(len(self.test_df))]

            self.main_df = pd.concat([self.main_df, self.test_df], ignore_index=True)

            self.test_df =self.test_df.sort_values(by=["cfips","first_day_of_month"])
            self.test_df = self.test_df.reset_index(drop=True)

        if self.use_census:
            #Merge the census features
            self.cfips_index=get_cfips_index()
            self.census_df = pd.read_csv(CENSUS_FILE)

            self.main_df=pd.merge(self.main_df,self.census_df,on=["cfips","first_day_of_month"],how="left")


        ##Group by cfips and sort by date
        self.main_df=self.main_df.sort_values(by=["cfips","first_day_of_month"])
        self.main_df["id"] =list(range(len(self.main_df)))



    def prepare_sequences(self):
        """
        Prepare the sequences for the LSTM:
        Build a list of (id(offset), id(seq_len+offset)) tuples
        """
        self.sequences=[]

        if self.type == DatasetType.TRAIN:
            ##Train data are dates before EVAL_START_DATE
            df=self.main_df[self.main_df['first_day_of_month']<EVAL_START_DATE]

            for i in tqdm(range(0, len(df)-self.seq_len, self.stride), desc="Preparing sequences of dataset of type train"):

                ##The cfips should be the same for the whole sequence(just check the first and last rows)
                if df.iloc[i]["cfips"] != df.iloc[i + self.seq_len - 1]["cfips"]:
                    continue

                if i + self.seq_len > len(df) :
                    break

                #Get the corresponding ids
                self.sequences.append((df.iloc[i]["id"], df.iloc[i]["id"]+ self.seq_len))



        else :


            if self.type == DatasetType.VALID:
                df = self.main_df[self.main_df['first_day_of_month'] >= EVAL_START_DATE]

            else:
                df = self.main_df[self.main_df['first_day_of_month'] >= TEST_START_DATE]


            for i in tqdm(range(0, len(df),self.stride), desc="Preparing sequences of dataset of type {}".format("eval" if self.type == DatasetType.VALID else "test")):
                ## In eval and test sequences, the step to predict should always be the last one of the sequence

                ##Find the offest of the start in the main df


                offset=df.iloc[i]["id"]

                offset = offset - self.seq_len  # The step to predict is the last one of the sequence


                ##check if the cfips is the same
                if self.main_df.iloc[offset]["cfips"] != self.main_df.iloc[offset + self.seq_len - 1]["cfips"]:
                    #Warning
                    print("Warning: cfips is not the same for the whole sequence . Offsets :",offset,offset + self.seq_len - 1)

                self.sequences.append((offset, offset + self.seq_len))






    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, item):
        """
        Retrieving seq_len data
        1. The county (CFIPS) should be the same
        2. And the difference between the date(first_day_of_month) should be at most 3 months
        """
        start,end=self.sequences[item]
        rows_data=self.main_df.iloc[start:end]


        #ensure unique cfips
        # assert len(rows_data["cfips"].unique())==1

        tensor = torch.tensor(rows_data[['microbusiness_density']].values,
                                       dtype=torch.float32)  # Not considering the census features

        #FEatures scaling


        if self.use_census:
            censur_features_tensor = extract_census_features(rows_data, cfips_index=self.cfips_index,single_row=False)
            tensor = torch.cat((censur_features_tensor,tensor), dim=1)



        return tensor



    def mix_with(self, other_dataset, size=0.8):
        """
        Combine two datasets exemple a train dataset and test dataset
        @param other_dataset:
        @param size:
        @return:
        """

        all_sequences= self.sequences + other_dataset.sequences
        random.shuffle(all_sequences)
        self.sequences=all_sequences[:int(len(all_sequences)*size)]
        other_dataset.sequences=all_sequences[int(len(all_sequences)*size):]
        logging.info("Combined dataset: {} sequences for train and {} sequences for test".format(len(self.sequences),len(other_dataset.sequences)))




# Trainer

In [1]:
import csv
import json
import logging
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import torch
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
class TrainerTransformerPredictor:
    """
    Class to manage the full training pipeline
    """
    def __init__(self, network: TransformerPredictor,
                 criterion,
                 optimizer,
                 scheduler=None,
                 nb_epochs=10, batch_size=128, reset=False):
        """
        @param network:
        @param dataset_name:
        @param images_dirs:
        @param loss:
        @param optimizer:
        @param nb_epochs:
        @param nb_workers: Number of worker for the dataloader
        """
        self.network = network
        self.batch_size = batch_size
        self.loss_fn=criterion

        self.optimizer = optimizer
        self.scheduler =scheduler if scheduler else\
            torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='min', factor=0.2, patience=10,min_lr=1e-5)

        self.nb_epochs = nb_epochs
        self.experiment_dir = self.network.experiment_dir
        self.model_info_file = os.path.join(self.experiment_dir, "model.json")
        self.model_info_best_file = os.path.join(self.experiment_dir, "model_best.json")

        if reset:
            if os.path.exists(self.experiment_dir):
                shutil.rmtree(self.experiment_dir)
        if not os.path.exists(self.experiment_dir):
            os.makedirs(self.experiment_dir)

        self.start_epoch = 0
        if not reset and os.path.exists(self.model_info_file):
            with open(self.model_info_file, "r") as f:
                self.start_epoch = json.load(f)["epoch"] + 1
                self.nb_epochs += self.start_epoch
                logging.info("Resuming from epoch {}".format(self.start_epoch))


    def save_model_info(self, infos, best=False):
        json.dump(infos, open(self.model_info_file, 'w'),indent=4)
        if best: json.dump(infos, open(self.model_info_best_file, 'w'),indent=4)

    def fit(self,train_dataloader,val_dataloader):
        logging.info("Launch training on {}".format(DEVICE))
        if self.network.use_census:
            logging.info("Using encoder census data")

        self.summary_writer = SummaryWriter(log_dir=self.experiment_dir)
        itr = self.start_epoch * len(train_dataloader) * self.batch_size  ##Global counter for steps

        #Save model graph
        # self.summary_writer.add_graph(self.network, next(iter(train_dataloader)).to(DEVICE)[:,:-1,:])

        self.best_val_loss = 1e20  # infinity
        if os.path.exists(self.model_info_file):
            with open(self.model_info_file, "r") as f:
                model_info = json.load(f)
                lr=model_info["lr"]
                logging.info(f"Setting lr to {lr}")
                for g in self.optimizer.param_groups:
                    g['lr'] = lr

        if os.path.exists(self.model_info_best_file):
            with open(self.model_info_best_file, "r") as f:
                best_model_info = json.load(f)
                self.best_val_loss = best_model_info["val_loss"]


        for epoch in range(self.start_epoch, self.nb_epochs):  # Training loop
            self.network.train()
            """"
            0. Initialize loss and other metrics
            """
            running_loss=Averager()
            pbar = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{self.nb_epochs}")
            for _, batch in enumerate(pbar):
                """
                Training lopp
                """
                self.optimizer.zero_grad()
                itr += self.batch_size
                """
                1.Forward pass
                """
                batch = batch.to(DEVICE)

                y_pred = self.network(batch)
                ## The output is the values of the density for each time step

                """
                2.Loss computation and other metrics
                """
                # The density is the last item of the batch
                y_true = batch[:,:,-1].to(DEVICE)
                loss = self.loss_fn(y_pred, y_true[:, -1:])

                """
                3.Optimizing
                """
                loss.backward()
                self.optimizer.step()
                running_loss.send(loss.cpu().item())
                pbar.set_postfix(current_loss=loss.cpu().item(), current_mean_loss=running_loss.value)

                """
                4.Writing logs and tensorboard data, loss and other metrics
                """
                self.summary_writer.add_scalar("Train/loss", loss.item(), itr)
                self.scheduler.step(loss.item())



            epoch_val_loss =self.eval(val_dataloader,epoch)

            infos = {
                "epoch": epoch,
                "train_loss":running_loss.value,
                "val_loss":epoch_val_loss.value,
                "lr": self.optimizer.param_groups[0]['lr'],
                "input_dim": self.network.input_dim,
                "emb_dim": self.network.emb_dim,
                "dim_feedforward": self.network.dim_feedforward,
                "n_head": self.network.n_head,
                "n_layers": self.network.n_layers,
                "seq_len": train_dataloader.dataset.seq_len,
                "batch_size": train_dataloader.batch_size,
                "stride": train_dataloader.dataset.stride,
                "use_census": self.network.use_census,
                "variante": self.network.variante_num,

            }

            logging.info("Epoch {} - Train loss: {:.4f} - Val loss: {:.4f}".format(epoch, running_loss.value, epoch_val_loss.value))

            if epoch_val_loss.value < self.best_val_loss:
                self.best_val_loss = epoch_val_loss.value
                best = True
            else:
                best = False

            self.network.save_state(best=best)
            self.save_model_info(infos, best=best)


            self.summary_writer.add_scalar("Epoch_train/loss", running_loss.value, epoch)
            self.summary_writer.add_scalar("Epoch_val/loss", epoch_val_loss.value, epoch)



    def eval(self, val_dataloader,epoch):
        """
        Compute loss and metrics on a validation dataloader
        @return:
        """
        with torch.no_grad():
            self.network.eval()
            running_loss=Averager()
            pbar = tqdm(val_dataloader, desc=f"Validation Epoch {epoch + 1}/{self.nb_epochs}")
            for _, batch in enumerate(pbar):

                """
                Training lopp
                """
                """
                1.Forward pass
                """
                batch=batch.to(DEVICE)
                y_pred = self.network(batch)
                """
                2.Loss computation and other metrics
                """
                y_true = batch[:,:,-1]


                loss = self.loss_fn(y_pred, y_true[:, -1:])

                running_loss.send(loss.item())

                pbar.set_postfix(current_loss=loss.item(), current_mean_loss=running_loss.value)


        return running_loss



    def run_test(self, test_dataloader):
        """
        Compute loss and metrics on a validation dataloader
        @return:
        """
        assert test_dataloader.batch_size == 1, "Batch size must be 1 for test"
        predictions = []
        row_ids = []
        with torch.no_grad():
            self.network.eval()
            for i, input in enumerate(tqdm(test_dataloader," Running tests for submission")):
                input = input.to(DEVICE)
                y_pred = self.network(input.to(DEVICE)).cpu().squeeze().item()

                # Denormalize. MEAN_MB, STD_MB (if noramlized)
                # y_pred = y_pred * STD_MB + MEAN_MB
                """
                2.Loss computation and other metrics
                """
                predictions.append(y_pred)

                ##Update all microbusiness_den isty column
                row_id=test_dataloader.dataset.test_df.loc[i,"row_id"]
                row_ids.append(row_id)

                test_dataloader.dataset.main_df.loc[test_dataloader.dataset.main_df["row_id"]==row_id,"microbusiness_density"]=y_pred


        #Merge predictions
        predictions=np.array(predictions)


        #Update all microbusiness_denisty column

        pred_test_df = pd.DataFrame(
            {
                "row_id":row_ids,
                 "microbusiness_density":predictions}

                                )
        pred_test_df.to_csv(os.path.join(self.experiment_dir,"submission.csv"),index=False)

        return pred_test_df



# Runner

In [None]:
from dataclasses import dataclass
import argparse
import logging
import os
import pickle
import torch.utils.data

@dataclass
class Arguments:
    reset: bool = False
    learning_rate: float = 0.001
    nb_epochs: int = 1000
    model_name: str = None
    num_workers: int = 0
    batch_size: int = 1024
    log_level: str = "INFO"
    autorun_tb: bool = True
    use_census: bool = True
    use_derivative: bool = False
    seq_len: int = 12
    seq_stride: int = 1
    emb_dim: int = 8
    n_layers: int = 2
    n_head: int = 2
    dim_feedforward: int = 32

def cli():
    return Arguments()

def main(args):

    #Format the model name
    if args.model_name is None:
        model_name=f"trf_{'ae_' if args.use_census else ''}{'dv_' if args.use_derivative else ''}ed.{args.emb_dim}_nl.{args.n_layers}_nh.{args.n_head}_df.{args.dim_feedforward}_sl.{args.seq_len}_ss.{args.seq_stride}_lr.{args.learning_rate}_bs.{args.batch_size}"
    else :
        model_name=args.model_name



    experiment_dir = os.path.join(EXPERIMENTS_DIR, model_name)


    # Setup logger


    network =TransformerPredictor(
                                    experiment_dir=experiment_dir,
                                    emb_dim=args.emb_dim,
                                      n_layers=args.n_layers,
                                      n_head=args.n_head,
                                      dim_feedforward=args.dim_feedforward,
                                      use_census=args.use_census,
                                    max_seq_len=args.seq_len-1,
                                    reset=args.reset
                ).to(DEVICE)


    #Adam optimizer
    optimizer = torch.optim.Adam(network.parameters(), lr=args.learning_rate)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=int(300*512/args.batch_size), factor=0.5, verbose=True ,min_lr=1e-5)

    criterion= SmapeCriterion().to(DEVICE)


    logging.info("Training : "+model_name)
    trainer = TrainerTransformerPredictor(network,
                      criterion,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      nb_epochs= args.nb_epochs,
                      batch_size=args.batch_size,
                      reset=args.reset,
                      )

    # Save  the dataset according to type, seq_len_stride and use_census: using pickle

    if not os.path.exists(os.path.join(ROOT_DIR,"dataset","pickle")):
        os.makedirs(os.path.join(ROOT_DIR,"dataset","pickle"))

    datasets_pickle_path = os.path.join(ROOT_DIR,"dataset","pickle",f"all_dataset_{args.seq_len}_{args.seq_stride}_{args.use_census}.pickle")


    if not os.path.exists(datasets_pickle_path):
        train_dataset = MicroDensityDataset(type=DatasetType.TRAIN, seq_len=args.seq_len, stride=args.seq_stride,
                                            use_census=args.use_census)
        val_dataset = MicroDensityDataset(type=DatasetType.VALID, seq_len=args.seq_len, stride=args.seq_stride,
                                          use_census=args.use_census)

        train_dataset.mix_with(val_dataset,size=0.8) #Mix train and val dataset to avoid disparity between the two in terms of dates distribution

        test_dataset = MicroDensityDataset(type=DatasetType.TEST, seq_len=args.seq_len, stride=args.seq_stride,
                                           use_census=args.use_census)

        with open(datasets_pickle_path,"wb") as f:
            logging.info(f"Saving datasets to {datasets_pickle_path}")
            pickle.dump((train_dataset,val_dataset,test_dataset),f)
    else:
        with open(datasets_pickle_path,"rb") as f:
            logging.info(f"Loading datasets  from {datasets_pickle_path}")
            train_dataset,val_dataset,test_dataset = pickle.load(f)



    logging.info(f"Nb sequences : Train {len(train_dataset)} - Val {len(val_dataset)} - Test {len(test_dataset)}")

    train_dataloader=torch.utils.data.DataLoader(train_dataset,batch_size=args.batch_size,num_workers=args.num_workers,shuffle=True,drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size,num_workers=0,drop_last=False)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1,num_workers=0,drop_last=False,shuffle=False)

    ##Train
    trainer.fit(train_dataloader,val_dataloader)

    ##Load best model
    trainer.network.load_state(best=True)
    trainer.run_test(test_dataloader=test_dataloader)



args = cli()
setup_logger(args)
main(args)
