In [56]:
"""
This notebook is used to prepare the x_pred_<streetname>.csv file that is needed later on for the prediction of result.

The data sources used are:
- The street traffic csv file. 
- Temperature and Occupation status of intensive care beds["taux_occupation_sae"] [covid-19 related]
- Key dates concerning curfew ["couvre feu"] and lockdown ["confinement"] status
- Key dates regarding holidays

The output features after-preprocessing are:
[
  Date et heure de comptage,mean_taux_occupation_past_week,mean_debit_horaire_past_week,date,weekday,weekofyear,month,year,
  time,tmin,tmax,taux_occupation_sae,confin_0,confin_1,confin_2,couvrefeu,ferie
]

The notebook is to be used 3 times for each street. After each execution, a new CSV file name x_pred_<streetname>.csv 
is created.

"""

'\nThis notebook is used to prepare the x_pred_<streetname>.csv file that is needed later on for the prediction of result.\n\nThe data sources used are:\n- The street traffic csv file. \n- Temperature and Occupation status of intensive care beds["taux_occupation_sae"] [covid-19 related]\n- Key dates concerning curfew ["couvre feu"] and lockdown ["confinement"] status\n- Key dates regarding holidays\n\nThe output features after-preprocessing are:\n[\n  Date et heure de comptage,mean_taux_occupation_past_week,mean_debit_horaire_past_week,date,weekday,weekofyear,month,year,\n  time,tmin,tmax,taux_occupation_sae,confin_0,confin_1,confin_2,couvrefeu,ferie\n]\n\nThe notebook is to be used 3 times for each street. After each execution, a new CSV file name x_pred_<streetname>.csv \nis created.\n\n'

In [57]:
import pandas as pd
import requests 
import json
from datetime import datetime , timedelta
import time

In [60]:
#street = 'washington'
#street = 'convention'
street = 'sts'

template_x_pred = "template_x_pred.csv"

if (street == 'washington' ):
  path_data = 'datasets/washington_edited.csv'
  save_name_x_predict = 'x_pred_washington.csv'
  print("Street selected: {}".format(street) )

elif (street == 'convention'):
  path_data = 'datasets/convention_edited.csv'
  save_name_x_predict = 'x_pred_convention.csv'
  print("Street selected: {}".format(street) )

elif (street == 'sts'):
  path_data = 'datasets/sts_edited.csv'
  save_name_x_predict = 'x_pred_sts.csv'
  print("Street selected: {}".format(street) )

else:
    print("Wrong street selected!!" )

Street selected: sts


In [61]:
def set_date(df):
  df["Date et heure de comptage"] = pd.to_datetime(df["Date et heure de comptage"],format='%Y-%m-%d %H:%M:%S%z')
  return df

In [62]:
DATE_LAST_WEEK = '2020-12-05'
df1 = pd.read_csv(path_data)
del df1['Unnamed: 0']
df2 = pd.read_csv(template_x_pred, sep=';')
df2.date = pd.to_datetime(df2["date"],format='%Y-%m-%d')
df2.date = df2.date.apply(lambda x : x.date())

nbr_days_to_predict = 6
df1 = df1[:-1]
wdf1 = df1[-7*24:] # last 7 days
ddf1 = df1[-nbr_days_to_predict*24:]

mean_debit_horaire_past_week = wdf1["Débit horaire"].mean()
mean_taux_occupation_past_week = wdf1["Taux d'occupation"].mean()
first_day = df1[df1["date"] == DATE_LAST_WEEK].reset_index(drop = True)[["Date et heure de comptage"]]
first_day = set_date(first_day)
first_day["Date et heure de comptage"] = first_day["Date et heure de comptage"].apply(lambda x: x+timedelta(6))
days = []
for i in range(6) : 
  day = first_day.copy()
  day["Date et heure de comptage"] = first_day["Date et heure de comptage"].apply(lambda x: x+timedelta(i))
  days.append(day)
ddf1 = pd.concat(days).reset_index(drop = True)

ddf1["date"] = [d.date() for d in ddf1["Date et heure de comptage"]]
ddf1["weekday"] = [d.weekday() for d in ddf1["Date et heure de comptage"]]
ddf1["weekofyear"] = [d.weekofyear for d in ddf1["Date et heure de comptage"]]
ddf1["month"] = [d.month for d in ddf1["Date et heure de comptage"]]
ddf1["year"] = [d.year for d in ddf1["Date et heure de comptage"]]
ddf1["time"] = [d.time() for d in ddf1["Date et heure de comptage"]]
ddf1['mean_debit_horaire_past_week'] = mean_debit_horaire_past_week
ddf1["mean_taux_occupation_past_week"] = mean_taux_occupation_past_week
ddf1.reset_index(inplace=True)
del ddf1['index']

ddf1 = pd.merge(ddf1,df2,on = 'date',how = 'left')

confi = pd.read_csv('datasets/confinement couvre feu.csv',sep = ";")
confi = pd.concat([pd.get_dummies(confi.confinement, prefix= 'confin'),confi], axis= 1).drop(columns = ['confinement'])
confi.date = pd.to_datetime(confi["date"],format='%Y-%m-%d')
confi.date = confi.date.apply(lambda x : x.date())
ddf1 = pd.merge(ddf1,confi,on = 'date',how = 'left') 

jf = pd.read_csv('datasets/feries.csv')
jf.nom_jour_ferie = 1
jf = jf.drop(['annee','zone'],axis = 1)
jf.date = pd.to_datetime(jf["date"],format='%Y-%m-%d')
jf.date = jf.date.apply(lambda x : x.date())
jf.rename(columns = {"nom_jour_ferie":"ferie"}, inplace = True)
ddf1 = pd.merge(ddf1,jf,on = 'date',how = 'left') 
ddf1.ferie = ddf1.ferie.fillna(0)
ddf1.ferie = ddf1.ferie.apply(lambda x : int(x))

ddf1.to_csv(save_name_x_predict)

In [66]:
print("{} created ".format(save_name_x_predict))

x_pred_sts.csv created 
