In [77]:
import sys
sys.path.insert(0, '../')
from forecast_model import Forecast_model

import pandas as pd
import numpy as np
import copy
from tqdm import tqdm
from scipy import spatial
try:
    import cPickle as pickle
except:
    import pickle

class Ha_model(Forecast_model):


    def __init__(self, nom: str):
        Forecast_model.__init__(self, nom)
        self.start_date = ''
        self.end_date = ''
        self.features = []
        self.time_series = []
        self.df_observation_path = ''
        self.df_date_path = ''


    def __str__(self):
        return "Description of model: %s\n" \
               "Learning start date: %s\n" \
               "Learning end date: %s\n" \
               "Features: %s\n" \
               "Learned time series: %s\n" \
               "Training data path: %s\n" \
               "Training exogenous data (date) path: %s" % (self.name, self.start_date, self.end_date,
                                                           ",".join(str(x) for x in self.features),
                                                           ",".join(str(x) for x in self.time_series),
                                                           self.df_observation_path, self.df_date_path)


    def learn(self, df_observation_path, df_date_path, start_date, end_date, features, time_series):
        self.start_date = start_date
        self.end_date = end_date
        self.features = features
        self.time_series = time_series
        self.df_observation_path = df_observation_path
        self.df_date_path = df_date_path

        print('Read data: observation and date')
        df_observation = pd.read_csv(df_observation_path).set_index("Datetime")
        df_date = pd.read_csv(df_date_path).set_index("Datetime")[features]
        df_observation = df_observation[start_date:end_date][time_series]
        df_observation = df_observation.join(df_date[start_date:end_date])
        

        print('Progress Bar 1/2 : learning mean')
        self.dict_pred_mean = dict([(tuple(np.array(i[:len(features)]).astype(int)), i[len(features):].astype(float))
                                    for i in tqdm(df_observation.groupby(features).mean().reset_index().values)])

        print('Progress Bar 2/2 : learning median')
        self.dict_pred_median = dict([(tuple(np.array(i[:len(features)]).astype(int)), i[len(features):].astype(float))
                                      for i in tqdm(df_observation.groupby(features).median().reset_index().values)])
        print('End of Learning')
        return                    

In [78]:
my_model = Ha_model('my_ha_model')

In [79]:
print(my_model)

Description of model: my_ha_model
Learning start date: 
Learning end date: 
Features: 
Learned time series: 
Training data path: 
Training exogenous data (date) path: 


In [25]:
df_sta_info_path = "../../../../data2/montreal/stm/data/station_info.csv"
df_i = pd.read_csv(df_sta_info_path)
print(df_i['stop_id'].values.astype('str').tolist())

['11', '32', '34', '15', '44', '65', '31', '33', '35', '47', '13', '14', '1', '9', '5', '18', '36', '24', '68', '43', '8', '64', '10', '55', '3', '49', '51', '2', '19', '56', '7', '6', '4', '48', '66', '25', '23', '28', '39', '54', '60', '27', '20', '46', '12', '21', '62', '52', '41', '50', '30', '16', '37', '40', '26', '67', '57', '61', '42', '45', '38', '29', '58', '63', '22', '59', '53', '17']


In [80]:
df_observation_path = '/home/toque/data2/montreal/stm/data/valid_metro_15min_2015_2016_2017_sumpass.csv'
df_date_path = '/home/toque/data/data_clean/date/2013-01-01-2019-01-01_new.csv'
start_date = '2015-01-01'
end_date = '2017-01-01'
features = ["hms_int_15min","Day_id"]
time_series = ['11', '32', '34', '15', '44', '65', '31', '33', '35', '47', '13', '14',
               '1', '9', '5', '18', '36', '24', '68', '43', '8', '64', '10', '55', '3',
               '49', '51', '2', '19', '56', '7', '6', '4', '48', '66', '25', '23', '28',
               '39', '54', '60', '27', '20', '46', '12', '21', '62', '52', '41', '50', '30',
               '16', '37', '40', '26', '67', '57', '61', '42', '45', '38', '29', '58', '63',
               '22', '59', '53', '17']

In [81]:
my_model.learn(df_observation_path, df_date_path, start_date, end_date, features, time_series)

Read data: observation and date


100%|██████████| 608/608 [00:00<00:00, 218408.43it/s]
100%|██████████| 608/608 [00:00<00:00, 219008.66it/s]

Progress Bar 1/2 : learning mean
Progress Bar 2/2 : learning median
End of Learning





In [113]:
import yaml
import argparse

import os
import sys
sys.path.insert(0, '../utils/')
from utils import *

"""
    Load configuration of the model from yaml file
"""

#parser = argparse.ArgumentParser(description='Process some integers.')
#parser.add_argument('--config', type=str, help='Yaml file containing the configuration of the model')
#config_file = parser.parse_args(['--config'])

config_file = 'config.yaml'
with open(config_file, 'r') as stream:
    try:
        config = yaml.load(stream)
    except yaml.YAMLError as exc:
        print(exc)
        
        
df_observation_path = config['df_observation_path']
df_date_path = config['df_date_path']
features = config['features']
stations = config['stations']
model_name = config['model_name']
start_date = config['start_date']
end_date = config['end_date']
path_to_save = config['path_to_save']
path_directory_to_save = path_to_save + model_name + '/'


print("You are going to create the model: %s", % (model_name))

create_model = True
if os.path.exists(path_directory_to_save):
    create_model = yes_or_no("WARNING !!!!!!!\nThe model %s saved in path: %s already exists.\nDo you want to erase and replace it?")

if create_model:
    my_model = Ha_model(model_name)
    print("Creation of the model done")

    print("Learning the model..")
    my_model.fit(df_valid_path, df_date_path, start_date, end_date, features, time_series)
    print("Learning done")

    print("Saving models..")
    my_model.save(path_directory_to_save, os.getcwd()+"/"+config_file)
    print("Saving models done")


AttributeError: 'dict' object has no attribute 'df_date_path'

In [115]:
def yes_or_no(question):
    while "The answer is invalid":
        reply = str(input(question+' (y/n): ')).lower().strip()
        if reply[:1] == 'y':
            return True
        if reply[:1] == 'n':
            return False

In [130]:

if yes_or_no("Do you want to continue"):
    print(' ok')
else:
    sys.exit()

print('10')

Do you want to continue (y/n): n


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [124]:
test()

do you want to continue (y/n): yn
 ok
10


In [133]:
import os
dir_path = os.getcwd()

In [134]:
dir_path

'/home/toque/work/forecast/model/ha'

In [1]:
import pandas as pd

In [3]:
obs = pd.read_csv('/home/toque/data2/montreal/stm/data/valid_metro_15min_2015_2016_2017_sumpass.csv')
exo = pd.read_csv('/home/toque/data2/date/2013-01-01-2019-01-01_new.csv')

In [29]:
import numpy as np

In [40]:
b = obs.head(3)
b = b.set_index("Datetime")
d = np.around((b.values/3*np.random.rand()), 2)
b[:] = d
b.reset_index()

Unnamed: 0,Datetime,11,32,34,15,44,65,31,33,35,...,42,45,38,29,58,63,22,59,53,17
0,2015-01-01 00:00:00,19.15,6.02,12.31,8.48,2.87,3.42,5.74,6.43,6.84,...,0.82,2.05,0.96,2.19,0.82,0.27,4.92,0.55,1.23,0.27
1,2015-01-01 00:15:00,29.54,4.24,14.77,4.92,3.83,5.2,5.61,7.25,5.47,...,2.05,15.18,1.64,7.25,2.32,0.96,9.16,0.27,2.74,1.5
2,2015-01-01 00:30:00,83.97,5.33,13.68,5.74,1.78,0.55,17.51,8.48,6.7,...,0.82,5.2,3.56,8.21,0.0,0.0,5.47,0.0,0.82,1.23


In [47]:
for i in np.arange(1):
    print('hello')

hello


In [19]:
a = exo.head(3)[['Datetime','Day_id','hms_int_15min','Vacances','Ferie','Mois_id']]

In [21]:
a.columns=['Datetime','day_id','timestep_id','school_holiday','holiday','month_id']

In [23]:
a[['Datetime', 'month_id', 'day_id', 'timestep_id', 'school_holiday', 'holiday' ]]

Unnamed: 0,Datetime,month_id,day_id,timestep_id,school_holiday,holiday
0,2013-01-01 00:00:00,1,1,0,1,1
1,2013-01-01 00:15:00,1,1,1,1,1
2,2013-01-01 00:30:00,1,1,2,1,1


In [48]:
a

Unnamed: 0,Datetime,day_id,timestep_id,school_holiday,holiday,month_id
0,2013-01-01 00:00:00,1,0,1,1,1
1,2013-01-01 00:15:00,1,1,1,1,1
2,2013-01-01 00:30:00,1,2,1,1,1


In [50]:
a = a.set_index('Datetime')

In [52]:
a.join([])


Unnamed: 0_level_0,day_id,timestep_id,school_holiday,holiday,month_id
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-01-01 00:00:00,1,0,1,1,1
2013-01-01 00:15:00,1,1,1,1,1
2013-01-01 00:30:00,1,2,1,1,1
