In [1]:
# DATA EXTRACTION
# Import dependencies:
import pandas as pd
import requests 
import json
from pprint import pprint
import matplotlib.pyplot as plt

In [2]:
# Read csv data:
features = pd.read_csv('Dataset/Walmart/features.csv')
stores = pd.read_csv('Dataset/Walmart/stores.csv')
train = pd.read_csv('Dataset/Walmart/train.csv')

In [3]:
# Display preview:
features.head()

Unnamed: 0,Store,Date,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,IsHoliday
0,1,2010-02-05,42.31,2.572,,,,,,211.096358,8.106,False
1,1,2010-02-12,38.51,2.548,,,,,,211.24217,8.106,True
2,1,2010-02-19,39.93,2.514,,,,,,211.289143,8.106,False
3,1,2010-02-26,46.63,2.561,,,,,,211.319643,8.106,False
4,1,2010-03-05,46.5,2.625,,,,,,211.350143,8.106,False


In [4]:
# Display preview:
stores.head()

Unnamed: 0,Store,Type,Size
0,1,A,151315
1,2,A,202307
2,3,B,37392
3,4,A,205863
4,5,B,34875


In [5]:
# Display preview:
train.head()

Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday
0,1,1,2010-02-05,24924.5,False
1,1,1,2010-02-12,46039.49,True
2,1,1,2010-02-19,41595.55,False
3,1,1,2010-02-26,19403.54,False
4,1,1,2010-03-05,21827.9,False


In [6]:
# Merge datasets:
features_stores = features.merge(stores, how='inner', on='Store')

# Display preview:
features_stores.head()

Unnamed: 0,Store,Date,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,IsHoliday,Type,Size
0,1,2010-02-05,42.31,2.572,,,,,,211.096358,8.106,False,A,151315
1,1,2010-02-12,38.51,2.548,,,,,,211.24217,8.106,True,A,151315
2,1,2010-02-19,39.93,2.514,,,,,,211.289143,8.106,False,A,151315
3,1,2010-02-26,46.63,2.561,,,,,,211.319643,8.106,False,A,151315
4,1,2010-03-05,46.5,2.625,,,,,,211.350143,8.106,False,A,151315


In [7]:
# Check data types:
pd.DataFrame(features_stores.dtypes, columns=['Type'])

Unnamed: 0,Type
Store,int64
Date,object
Temperature,float64
Fuel_Price,float64
MarkDown1,float64
MarkDown2,float64
MarkDown3,float64
MarkDown4,float64
MarkDown5,float64
CPI,float64


In [8]:
# Check datatypes:
pd.DataFrame(train.dtypes, columns=['Type'])

Unnamed: 0,Type
Store,int64
Dept,int64
Date,object
Weekly_Sales,float64
IsHoliday,bool


In [9]:
# Convert date column from string to date type:
features_stores.Date = pd.to_datetime(features_stores.Date)
train.Date = pd.to_datetime(train.Date)

# Add column week and year:
features_stores['Week'] = features_stores.Date.dt.week 
features_stores['Year'] = features_stores.Date.dt.year

In [10]:
# Merge datasets:
walmart_data = train.merge(features_stores, 
                           how='inner',
                           on=['Store','Date','IsHoliday']).sort_values(by=['Store',
                                                                            'Dept',
                                                                            'Date']).reset_index(drop=True)
# Display preview:
walmart_data.head()

Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,Type,Size,Week,Year
0,1,1,2010-02-05,24924.5,False,42.31,2.572,,,,,,211.096358,8.106,A,151315,5,2010
1,1,1,2010-02-12,46039.49,True,38.51,2.548,,,,,,211.24217,8.106,A,151315,6,2010
2,1,1,2010-02-19,41595.55,False,39.93,2.514,,,,,,211.289143,8.106,A,151315,7,2010
3,1,1,2010-02-26,19403.54,False,46.63,2.561,,,,,,211.319643,8.106,A,151315,8,2010
4,1,1,2010-03-05,21827.9,False,46.5,2.625,,,,,,211.350143,8.106,A,151315,9,2010


In [11]:
# Import API key:
%run config.ipynb

# Test API:
year = 2010
country = 'CA'

# Build query url:
url = f'https://calendarific.com/api/v2/holidays?&api_key={api_key}&country={country}&year={year}'

# Request url and print json file:
response = requests.get(url).json()
pprint(response)

{'meta': {'code': 200},
 'response': {'holidays': [{'country': {'id': 'ca', 'name': 'Canada'},
                            'date': {'datetime': {'day': 1,
                                                  'month': 1,
                                                  'year': 2010},
                                     'iso': '2010-01-01'},
                            'description': "New Year's Day is the first day of "
                                           'a new year according to the '
                                           'Gregorian calendar used in Canada '
                                           'and many other countries.',
                            'locations': 'All',
                            'name': "New Year's Day",
                            'states': 'All',
                            'type': ['National holiday']},
                           {'country': {'id': 'ca', 'name': 'Canada'},
                            'date': {'datetime': {'day': 2,
               

                                        'name': 'Northwest Territories'},
                                       {'abbrev': 'NU',
                                        'exception': None,
                                        'id': 64,
                                        'iso': 'ca-nu',
                                        'name': 'Nunavut'},
                                       {'abbrev': 'ON',
                                        'exception': None,
                                        'id': 65,
                                        'iso': 'ca-on',
                                        'name': 'Ontario'},
                                       {'abbrev': 'SK',
                                        'exception': None,
                                        'id': 68,
                                        'iso': 'ca-sk',
                                        'name': 'Saskatchewan'},
                                       {'abbrev': 'YT',
                      

                                        'exception': None,
                                        'id': 57,
                                        'iso': 'ca-ab',
                                        'name': 'Alberta'},
                                       {'abbrev': 'BC',
                                        'exception': None,
                                        'id': 58,
                                        'iso': 'ca-bc',
                                        'name': 'British Columbia'},
                                       {'abbrev': 'NB',
                                        'exception': None,
                                        'id': 60,
                                        'iso': 'ca-nb',
                                        'name': 'New Brunswick'},
                                       {'abbrev': 'NL',
                                        'exception': None,
                                        'id': 61,
                                 

In [12]:
# Set holidays, country and years we want dates:
holiday = ['Christmas Day','Christmas Eve','Independence Day',"New Year's Day", "Thanksgiving Day"]
country = 'US'
year = [2010,2011, 2012]

# Loops through API and get dates:
holiday_name = []
holiday_date =[]
category = []
holiday_year = []

for number in year:
    # Set url base
    url = f'https://calendarific.com/api/v2/holidays?&api_key={api_key}&country={country}&year={number}'
    response = requests.get(url).json()
    
    #Loop through holiday
    for name in holiday:
        # Loop through API
        for i in range(len(response['response']['holidays'])):
            if name == response['response']['holidays'][i]['name']:
                holiday_date.append(response['response']['holidays'][i]['date']['iso'])
                category.append(name)
                holiday_year.append(number)
                break
                
# Create dataframes of lists created:
holiday_df = pd.DataFrame({'Holiday': category,
                        'Year': holiday_year,
                        'Date': holiday_date})

# Display preview:
holiday_df.head()

Unnamed: 0,Holiday,Year,Date
0,Christmas Day,2010,2010-12-25
1,Christmas Eve,2010,2010-12-24
2,Independence Day,2010,2010-07-04
3,New Year's Day,2010,2010-01-01
4,Thanksgiving Day,2010,2010-11-25
