# Time series predictor

In [1]:
import numpy as np
import time
from datetime import date
import datetime
from datetime import timedelta  
import csv
import holidays # for importing the public holidays
import re
import torch
from src.utils import *
from src.data_miner import DataMiner

local_holidays = holidays.Italy(prov='BO') # Get the holidays in Bologna, Italy :)

/home/fedebotu/Documents/good-night-ml


In [2]:
# Variables
data_dir = "data"
dataset = "data/LastSeenDataset.csv"

- Feature extraction: we first extract the features given the time series data of Telegram accesses.
- Supposition: last Telegram access in very similar to the time the person goes to sleep

## Feature engineering
Possible features to extract: 
1. Last seen time (arguably the most important)
2. Wake up time
3. Number of Telegram accesses during the previous day
4. Day of the week
5. Public holiday presence in the following day (using the holidays library)
6. (time spent on Telegram)


In [3]:
with open(dataset, newline='') as csvfile:
    date_list = list(csv.reader(csvfile))

date_list = convert_to_dates(date_list)

'''Test data: search calendar for local holidays'''
print(date_list[0][0] in local_holidays)

False


In [6]:
data_tensor =  DataMiner(date_list).to_tensor(verbose=False)
print(data_tensor)

tensor([[0.6002, 0.5434, 0.4465, 0.5033, 0.4888, 0.5380, 0.5200, 0.6680, 0.1981,
         0.5418, 0.5891, 0.5230, 0.5878, 0.2870, 0.1545, 0.3483, 0.1007, 0.6694,
         0.5091, 0.4906, 0.6093],
        [0.6667, 0.5991, 0.6653, 0.6445, 0.7801, 0.6894, 0.6742, 0.6647, 1.0048,
         0.6278, 0.7105, 0.6988, 0.6384, 0.8407, 0.9146, 0.7862, 1.1783, 0.4033,
         0.6723, 0.6988, 0.6034],
        [1.0000, 0.0000, 0.1667, 0.3333, 0.5000, 0.6667, 0.8333, 1.0000, 0.0000,
         0.1667, 0.3333, 0.5000, 0.6667, 0.8333, 1.0000, 0.0000, 0.1667, 0.3333,
         0.5000, 0.6667, 0.8333],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000],
        [0.1400, 0.1200, 0.0600, 0.0500, 0.0700, 0.1400, 0.0967, 0.1400, 0.0300,
         0.2100, 0.3400, 0.1400, 0.2600, 0.2200, 0.0500, 0.1700, 0.0300, 0.3900,
         0.3600, 0.2500, 0.2600]], dtype=torch.float64

In [None]:
# Data augmentation


# Data augmentation
# We use the "last 3" trend
# Credits: https://stackabuse.com/time-series-prediction-using-lstm-with-pytorch-in-python/
train_window = 2

def create_inout_sequences(dt, tw):
    inout_seq = []
    L = dt.shape[1]
    for i in range(L-tw):
        train_seq = torch.zeros(NUM_FEATURES, tw)
        for j in range(NUM_FEATURES):
            train_seq[j]= dt[j][i:i+tw]
        train_label = dt[0][i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    print(inout_seq)
    return inout_seq

train_inout_seq = create_inout_sequences(data_tensor, train_window)


## Model
- Time series data, so possible idea(s):
    - LSTM

In [None]:
from torch import nn

class LSTM(nn.Module):
    def __init__(self, input_size=NUM_FEATURES, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]
    
model = LSTM()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print(model)

In [None]:
epochs = 150

for i in range(epochs):
    for seq, label in train_inout_seq:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                        torch.zeros(1, 1, model.hidden_layer_size))

        y_pred = model(seq)

        single_loss = loss_function(y_pred, label)
        single_loss.backward()
        optimizer.step()

    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')