# Time series predictor

In [1]:
import numpy as np
import time
from datetime import date
import datetime
from datetime import timedelta  
import csv
import holidays # for importing the public holidays
import re
import torch
from src.utils import *
from src.data_miner import DataMiner

## Parameters

In [2]:
num_features = 5
min_hour = 21 # Minimum hour for sleep detection
max_hour = 5
train_window = 3 # Sequence length
local_holidays = holidays.Italy(prov='BO') # Get the holidays in Bologna, Italy :)
train_episodes = 1000
batch_size = 1

In [3]:
# Variables
data_dir = "data"
dataset = "data/LastSeenDataset.csv"

- Feature extraction: we first extract the features given the time series data of Telegram accesses.
- Supposition: last Telegram access in very similar to the time the person goes to sleep

## Feature engineering
Possible features to extract: 
1. Last seen time (arguably the most important)
2. Wake up time
3. Number of Telegram accesses during the previous day
4. Day of the week
5. Public holiday presence in the following day (using the holidays library)
6. (time spent on Telegram)


In [4]:
with open(dataset, newline='') as csvfile:
    date_list = list(csv.reader(csvfile))

date_list = convert_to_dates(date_list)

'''Test data: search calendar for local holidays'''
print("First day is holiday: ", date_list[0][0] in local_holidays)

First day is holiday:  False


In [5]:
data_tensor =  DataMiner(date_list).to_tensor(verbose=False)
print(data_tensor)

tensor([[0.6002, 0.5434, 0.4465, 0.5033, 0.4888, 0.5380, 0.5200, 0.6680, 0.1981,
         0.5418, 0.5891, 0.5230, 0.5878, 0.2870, 0.1545, 0.3483, 0.1007, 0.6694,
         0.5091, 0.4906, 0.6093, 0.6412, 0.8530, 0.3883, 0.5664, 0.7656],
        [0.6667, 0.5991, 0.6653, 0.6445, 0.7801, 0.6894, 0.6742, 0.6647, 1.0048,
         0.6278, 0.7105, 0.6988, 0.6384, 0.8407, 0.9146, 0.7862, 1.1783, 0.4033,
         0.6723, 0.6988, 0.6034, 0.5998, 0.5354, 0.7193, 0.7006, 0.5691],
        [1.0000, 0.0000, 0.1667, 0.3333, 0.5000, 0.6667, 0.8333, 1.0000, 0.0000,
         0.1667, 0.3333, 0.5000, 0.6667, 0.8333, 1.0000, 0.0000, 0.1667, 0.3333,
         0.5000, 0.6667, 0.8333, 1.0000, 0.0000, 0.1667, 0.3333, 0.5000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.1400, 0.1200, 0.0600, 0.0500, 0.0700, 0.1400,

## Data augmentation

Given that the training data is not much, we can insert some noise to augment it; this will also make the model less prone to overfitting

In [None]:
# Data augmentation


# We use the "last 3" trend
# Credits: https://stackabuse.com/time-series-prediction-using-lstm-with-pytorch-in-python/
'''The sequence on which we have a prediction is the last train_window days'''
X, y = create_sequences(data_tensor, train_window)

## Model
- Time series data, so possible idea(s):
    - LSTM

In [7]:
from src.models import MLP

In [8]:
n_features = num_features # this is number of parallel inputs
n_timesteps = train_window # this is number of timesteps

# convert dataset into input/output

# create NN
model = MLP(n_features*n_timesteps*batch_size, 1)
criterion = torch.nn.MSELoss() # reduction='sum' created huge loss value
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

print(X[2])
print(y[1])

tensor([[0.4465, 0.6653, 0.1667, 0.0000, 0.0600],
        [0.5033, 0.6445, 0.3333, 0.0000, 0.0500],
        [0.4888, 0.7801, 0.5000, 0.0000, 0.0700]])
tensor([0.4888])


In [14]:
model.train()

# Training loop
for t in range(train_episodes):
    for b in range(0,len(X)-1,batch_size):
        optimizer.zero_grad()
        x_batch = X[b:b+batch_size,:,:]
        y_batch = y[b:b+batch_size]
#         x_batch = torch.tensor(inpt,dtype=torch.float32)    
#         y_batch = torch.tensor(target,dtype=torch.float32)
        output = model.forward(x_batch) 
        loss = criterion(output, y_batch)  
#         print('PREDICTED:\n', output); print('REAL:\n', y_batch)
        loss.backward()
        optimizer.step()        
        #loss_list.append(loss.item())
    if t%10 == 0:
        print(('Step: {:4}   |   Loss: {:.6f} ').format(t, loss))

Step:    0   |   Loss: 0.003615 
Step:   10   |   Loss: 0.001513 
Step:   20   |   Loss: 0.001027 
Step:   30   |   Loss: 0.004120 
Step:   40   |   Loss: 0.004048 
Step:   50   |   Loss: 0.003955 
Step:   60   |   Loss: 0.003816 
Step:   70   |   Loss: 0.003735 
Step:   80   |   Loss: 0.003798 
Step:   90   |   Loss: 0.002145 
Step:  100   |   Loss: 0.000460 
Step:  110   |   Loss: 0.004028 
Step:  120   |   Loss: 0.004056 
Step:  130   |   Loss: 0.003992 
Step:  140   |   Loss: 0.003845 
Step:  150   |   Loss: 0.003736 
Step:  160   |   Loss: 0.003790 
Step:  170   |   Loss: 0.003524 
Step:  180   |   Loss: 0.000465 
Step:  190   |   Loss: 0.004421 
Step:  200   |   Loss: 0.004223 
Step:  210   |   Loss: 0.003982 
Step:  220   |   Loss: 0.003879 
Step:  230   |   Loss: 0.003768 
Step:  240   |   Loss: 0.003919 
Step:  250   |   Loss: 0.002185 
Step:  260   |   Loss: 0.000370 
Step:  270   |   Loss: 0.004315 
Step:  280   |   Loss: 0.004325 
Step:  290   |   Loss: 0.003933 
Step:  300

In [15]:
model.eval()
b = 1
inpt = X[-b-batch_size:-b,:,:]
target = y[b:b+batch_size]
x_batch = torch.tensor(inpt,dtype=torch.float32)    
y_batch = torch.tensor(target,dtype=torch.float32)


print(x_batch.size())
output = model.forward(torch.tensor(X[-1-batch_size:-1,:,:],dtype=torch.float32))[0]

with torch.no_grad():
    print(model.forward(torch.tensor(X[-1-batch_size:-1,:,:],dtype=torch.float32))[0])

torch.Size([1, 3, 5])
tensor([0.5172])


  x_batch = torch.tensor(inpt,dtype=torch.float32)
  y_batch = torch.tensor(target,dtype=torch.float32)
  output = model.forward(torch.tensor(X[-1-batch_size:-1,:,:],dtype=torch.float32))[0]
  print(model.forward(torch.tensor(X[-1-batch_size:-1,:,:],dtype=torch.float32))[0])


In [17]:
with torch.no_grad():
    for i in range(len(X)-3):
#         print(torch.tensor(X[i:i+1, :, : ],dtype=torch.float32).shape)
        print('Predicted: {:.4f} | Real: {:.4f}'.format(model.forward(torch.tensor(X[i:batch_size+i,:,:])).item(), y[i+batch_size-1].item()))

Predicted: 0.4995 | Real: 0.5033
Predicted: 0.5007 | Real: 0.4888
Predicted: 0.5372 | Real: 0.5380
Predicted: 0.5391 | Real: 0.5200
Predicted: 0.6485 | Real: 0.6680
Predicted: 0.2082 | Real: 0.1981
Predicted: 0.5437 | Real: 0.5418
Predicted: 0.5943 | Real: 0.5891
Predicted: 0.5167 | Real: 0.5230
Predicted: 0.5945 | Real: 0.5878
Predicted: 0.1897 | Real: 0.2870
Predicted: 0.1814 | Real: 0.1545
Predicted: 0.3424 | Real: 0.3483
Predicted: 0.1814 | Real: 0.1007
Predicted: 0.6676 | Real: 0.6694
Predicted: 0.5063 | Real: 0.5091
Predicted: 0.5023 | Real: 0.4906
Predicted: 0.5399 | Real: 0.6093
Predicted: 0.5668 | Real: 0.6412
Predicted: 0.7680 | Real: 0.8530


  print('Predicted: {:.4f} | Real: {:.4f}'.format(model.forward(torch.tensor(X[i:batch_size+i,:,:])).item(), y[i+batch_size-1].item()))


## Saving the time

We save the predicted time to send the message in a file, so that the Daemon can handle it

In [12]:
now = datetime.datetime.now()
# with torch.no_grad():
p = mv_net.forward(torch.tensor(X[-batch_size-1:-1:,:],dtype=torch.float32))[0].detach().numpy()
p_sec = int(p[0]*(max_hour+24-min_hour)*3600)
prediction = now.replace(hour=min_hour, minute=0, second=0) + timedelta(seconds=p_sec)
print('Expected time to go to sleep: ', prediction.strftime("%Y-%m-%d %H:%M:%S"))


'''Write the value on a text file to be read by the Daemon'''
with open ('prediction.txt','w') as z:
    z.write(prediction.strftime("%Y-%m-%d %H:%M:%S\n"))
z.close()

with open ('data/prediction_list.txt','a') as z:
    z.write(prediction.strftime("%Y-%m-%d %H:%M:%S\n"))
z.close()

NameError: name 'mv_net' is not defined