In [25]:
# BASE
# ------------------------------------------------------
import numpy as np
import pandas as pd
import os
import gc
import warnings

# DATA VISUALIZATION
# ------------------------------------------------------
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


# CONFIGURATIONS
# ------------------------------------------------------
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.2f}'.format
warnings.filterwarnings('ignore')

In [26]:
holidays = pd.read_csv("C://Users//fento//store-sales-time-series-forecasting//holidays_events.csv")
holidays["date"] = pd.to_datetime(holidays.date)

In [27]:
# Transferred Holidays
tr1 = holidays[(holidays.type == "Holiday") & (holidays.transferred == True)].drop("transferred", axis = 1).reset_index(drop = True)
tr2 = holidays[(holidays.type == "Transfer")].drop("transferred", axis = 1).reset_index(drop = True)
tr = pd.concat([tr1,tr2], axis = 1)
tr = tr.iloc[:, [5,1,2,3,4]]

holidays = holidays[(holidays.transferred == False) & (holidays.type != "Transfer")].drop("transferred", axis = 1)
holidays = holidays.append(tr).reset_index(drop = True)


# Additional Holidays
holidays["description"] = holidays["description"].str.replace("-", "").str.replace("+", "").str.replace('\d+', '')
holidays["type"] = np.where(holidays["type"] == "Additional", "Holiday", holidays["type"])


# Bridge Holidays
holidays["description"] = holidays["description"].str.replace("Puente ", "")
holidays["type"] = np.where(holidays["type"] == "Bridge", "Holiday", holidays["type"])

holidays

Unnamed: 0,date,type,locale,locale_name,description
0,2012-03-02,Holiday,Local,Manta,Fundacion de Manta
1,2012-04-01,Holiday,Regional,Cotopaxi,Provincializacion de Cotopaxi
2,2012-04-12,Holiday,Local,Cuenca,Fundacion de Cuenca
3,2012-04-14,Holiday,Local,Libertad,Cantonizacion de Libertad
4,2012-04-21,Holiday,Local,Riobamba,Cantonizacion de Riobamba
...,...,...,...,...,...
333,2017-04-13,Holiday,Local,Cuenca,Fundacion de Cuenca
334,2017-05-26,Holiday,National,Ecuador,Batalla de Pichincha
335,2017-08-11,Holiday,National,Ecuador,Primer Grito de Independencia
336,2017-09-29,Holiday,Local,Ibarra,Fundacion de Ibarra


In [28]:
# Work Day Holidays, that is meant to payback the Bridge.
work_day = holidays[holidays.type == "Work Day"]  
holidays = holidays[holidays.type != "Work Day"]  

In [37]:
event = holidays[holidays.type == "Event"]

In [34]:
holidays2 = holidays[holidays.type != "Event"]
national = holidays2[(holidays2.locale != "Regional") & (holidays2.locale != "Local")].drop_duplicates()
local = holidays2[holidays2.locale == "Local"].rename({"description":"holiday_local", "locale_name":"city"}, axis = 1).drop("locale", axis = 1).drop_duplicates().drop("type", axis = 1)
regional = holidays2[holidays2.locale == "Regional"].rename({"locale_name":"state", "description":"holiday_regional"}, axis = 1).drop("locale", axis = 1).drop_duplicates().drop("type", axis = 1)

In [35]:
national[(national['date'] >= '2013-06-01') & (national['date'] <= '2016-11-30')].sort_values(by='date')

Unnamed: 0,date,type,locale,locale_name,description
65,2013-08-10,Holiday,National,Ecuador,Primer Grito de Independencia
327,2013-10-11,Holiday,National,Ecuador,Independencia de Guayaquil
70,2013-11-02,Holiday,National,Ecuador,Dia de Difuntos
71,2013-11-03,Holiday,National,Ecuador,Independencia de Cuenca
80,2013-12-21,Holiday,National,Ecuador,Navidad
81,2013-12-22,Holiday,National,Ecuador,Navidad
83,2013-12-23,Holiday,National,Ecuador,Navidad
84,2013-12-24,Holiday,National,Ecuador,Navidad
85,2013-12-25,Holiday,National,Ecuador,Navidad
86,2013-12-26,Holiday,National,Ecuador,Navidad


In [41]:
combined = pd.concat([work_day, event, national], ignore_index=True).sort_values(by='date')
combined = combined.reset_index(drop=True)
combined

Unnamed: 0,date,type,locale,locale_name,description
0,2012-08-10,Holiday,National,Ecuador,Primer Grito de Independencia
1,2012-10-12,Holiday,National,Ecuador,Independencia de Guayaquil
2,2012-11-02,Holiday,National,Ecuador,Dia de Difuntos
3,2012-11-03,Holiday,National,Ecuador,Independencia de Cuenca
4,2012-12-21,Holiday,National,Ecuador,Navidad
...,...,...,...,...,...
158,2017-12-22,Holiday,National,Ecuador,Navidad
159,2017-12-23,Holiday,National,Ecuador,Navidad
160,2017-12-24,Holiday,National,Ecuador,Navidad
161,2017-12-25,Holiday,National,Ecuador,Navidad


In [42]:
combined['date'].duplicated()

0      False
1      False
2      False
3      False
4      False
       ...  
158    False
159    False
160    False
161    False
162    False
Name: date, Length: 163, dtype: bool

In [43]:
combined.to_csv('C://Users//fento//store-sales-time-series-forecasting//national.csv', index=False)
local.to_csv('C://Users//fento//store-sales-time-series-forecasting//local.csv', index=False)
regional.to_csv('C://Users//fento//store-sales-time-series-forecasting//regional.csv', index=False)