In [1]:
# Import 3rd party libraries
import os
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt

# Configure Notebook
%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set_context("notebook")
import warnings
warnings.filterwarnings('ignore')

# 1. Weather Data

In [2]:
original_path = os.getcwd()

In [3]:
# Set weather data path
path_weather = os.path.join(os.path.abspath(original_path), 'data', 'weather')

# Get weather file names
weather_filenames = [filename for filename in os.listdir(path_weather) if 'climate' in filename]

# Print file names
print(weather_filenames[0:5])

['en_climate_hourly_ON_6158359_01-2017_P1H.csv', 'en_climate_hourly_ON_6158359_01-2018_P1H.csv', 'en_climate_hourly_ON_6158359_01-2019_P1H.csv', 'en_climate_hourly_ON_6158359_01-2020_P1H.csv', 'en_climate_hourly_ON_6158359_01-2021_P1H.csv']


In [4]:
os.chdir(path_weather)

# Merging all weather files and creating a data frame
df = pd.DataFrame()
for i in range(len(weather_filenames)):
    x_df = pd.read_csv(weather_filenames[i])
    if (x_df.columns[8]=='Time (LST)'):
        x_df.rename(columns = {'Time (LST)':'Time'}, inplace=True)
    if (x_df.columns[4]=='Date/Time (LST)'):
        x_df.rename(columns = {'Date/Time (LST)':'Date/Time'}, inplace=True)
    df= pd.concat([x_df,df],ignore_index=True)
weather_data = df
os.chdir(original_path)

In [5]:
weather_data.columns

Index(['Longitude (x)', 'Latitude (y)', 'Station Name', 'Climate ID',
       'Date/Time', 'Year', 'Month', 'Day', 'Time', 'Temp (°C)', 'Temp Flag',
       'Dew Point Temp (°C)', 'Dew Point Temp Flag', 'Rel Hum (%)',
       'Rel Hum Flag', 'Precip. Amount (mm)', 'Precip. Amount Flag',
       'Wind Dir (10s deg)', 'Wind Dir Flag', 'Wind Spd (km/h)',
       'Wind Spd Flag', 'Visibility (km)', 'Visibility Flag',
       'Stn Press (kPa)', 'Stn Press Flag', 'Hmdx', 'Hmdx Flag', 'Wind Chill',
       'Wind Chill Flag', 'Weather'],
      dtype='object')

In [6]:
weather_data['Temp (°C)'].max()

34.1

In [7]:
weather_data['Temp (°C)'].min()

-21.9

In [8]:
weather_data['Year'].value_counts()

2020    8784
2021    8760
2019    8760
2018    8760
2017    8760
2022    7296
Name: Year, dtype: int64

In [9]:
weather_data['Date/Time'].isnull().sum()

0

In [10]:
weather_data['Date/Time'] = pd.DatetimeIndex(weather_data['Date/Time'] ).tz_localize('EST')

In [11]:
weather_data.head()

Unnamed: 0,Longitude (x),Latitude (y),Station Name,Climate ID,Date/Time,Year,Month,Day,Time,Temp (°C),...,Wind Spd Flag,Visibility (km),Visibility Flag,Stn Press (kPa),Stn Press Flag,Hmdx,Hmdx Flag,Wind Chill,Wind Chill Flag,Weather
0,-79.4,43.63,TORONTO CITY CENTRE,6158359,2021-12-01 00:00:00-05:00,2021,12,1,00:00,3.5,...,,16.1,,100.41,,,,,,
1,-79.4,43.63,TORONTO CITY CENTRE,6158359,2021-12-01 01:00:00-05:00,2021,12,1,01:00,3.5,...,,16.1,,100.5,,,,,,
2,-79.4,43.63,TORONTO CITY CENTRE,6158359,2021-12-01 02:00:00-05:00,2021,12,1,02:00,3.5,...,,16.1,,100.58,,,,,,
3,-79.4,43.63,TORONTO CITY CENTRE,6158359,2021-12-01 03:00:00-05:00,2021,12,1,03:00,3.3,...,,16.1,,100.71,,,,,,
4,-79.4,43.63,TORONTO CITY CENTRE,6158359,2021-12-01 04:00:00-05:00,2021,12,1,04:00,2.7,...,,16.1,,100.73,,,,,,


In [12]:
weather_data.to_csv('weather_data.csv',index=False)