In [39]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import sem
import datetime as dt

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
site_load = "SiteList.csv"
daily_load = "CYYZ_daily.txt"

# Read the international and Canadian GHE data files
site_load_data = pd.read_csv(site_load)
daily_load_data = pd.read_csv(daily_load)

In [30]:
# Cleaning the international data file
site_load_data.head()
# cite is CYYZ,TORONTO,EST,-5,ON,43.68,-79.63,568

Unnamed: 0,Site4,City,TimeZone,OffUTC,State,Lat,Lon,Elevation(feet)
0,CWAJ,ERIEAU (MAPS),EST,-5,ON,42.25,-81.9,584
1,CWBE,KILLARNEY (MAPS),EST,-5,ON,45.966667,-81.466667,643
2,CWBZ,ST ANICET,EST,-5,QC,45.116667,-74.266667,161
3,CWFJ,CARDSTON (AUT),MST,-7,AB,49.2,-113.266667,3727
4,CWGD,GODERICH (AUTO8),EST,-5,ON,43.766667,-81.716667,702


In [79]:
daily_load_data["Date"] = daily_load_data['Date'].astype("str")
# ('datetime64[ns]')

daily_load_data.set_index("Date").head()

Unnamed: 0_level_0,Site4,Source,Max Temp,Min Temp,Avg Temp,HDDs,CDDs,Precipitation Water Equiv,Snowfall,Snow/Ice Depth
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1971-01-01,CYYZ,Unknwn-EstDew-EstWind,20,2,11.0,54.0,0.0,0.0,0.0,7.1
1971-01-02,CYYZ,Unknwn-EstDew-EstWind,38,5,21.5,43.5,0.0,0.0001,0.1,7.1
1971-01-03,CYYZ,Unknwn-EstDew-EstWind,35,29,32.0,33.0,0.0,0.0,0.0,3.9
1971-01-04,CYYZ,Unknwn-EstDew-EstWind,42,30,36.0,29.0,0.0,0.29,0.9,5.1
1971-01-05,CYYZ,Unknwn-EstDew-EstWind,32,18,25.0,40.0,0.0,0.0001,0.1,3.2


In [64]:
daily_load_data.describe()

Unnamed: 0,Max Temp,Min Temp,Avg Temp,HDDs,CDDs,Precipitation Water Equiv,Snowfall,Snow/Ice Depth
count,17842.0,17842.0,17842.0,17842.0,17842.0,17836.0,16448.0,15449.0
mean,55.510145,37.85422,46.682182,19.698296,1.380479,0.081829,0.116027,0.694871
std,20.620595,18.311542,19.185942,17.393757,3.341911,0.210119,0.550149,1.918641
min,-6.0,-24.0,-12.5,0.0,0.0,0.0,0.0,0.0
25%,38.0,26.0,32.0,1.5,0.0,0.0,0.0,0.0
50%,56.0,38.0,47.5,17.5,0.0,0.0001,0.0,0.0
75%,74.0,53.0,63.5,33.0,0.0,0.05,0.0,0.01
max,100.0,79.0,89.5,77.5,24.5,4.67,12.0,26.4


In [65]:
daily_load_data.count()

Site4                        17842
Date                         17842
Source                       17723
Max Temp                     17842
Min Temp                     17842
Avg Temp                     17842
HDDs                         17842
CDDs                         17842
Precipitation Water Equiv    17836
Snowfall                     16448
Snow/Ice Depth               15449
dtype: int64

In [77]:
daily_load_data.dtypes

Site4                                object
Date                         datetime64[ns]
Source                               object
Max Temp                              int64
Min Temp                              int64
Avg Temp                            float64
HDDs                                float64
CDDs                                float64
Precipitation Water Equiv           float64
Snowfall                            float64
Snow/Ice Depth                      float64
dtype: object

In [103]:
def getYear(s):
  return s.split("-")[0]
def getMonth(s):
  return s.split("-")[1]

daily_load_data['year']= daily_load_data['Date'].apply(lambda x: getYear(x))
daily_load_data['month']= daily_load_data['Date'].apply(lambda x: getMonth(x))
daily_load_data.head()

Unnamed: 0,Site4,Date,Max Temp,Min Temp,Avg Temp,year,month
0,CYYZ,1971-01-01,20,2,11.0,1971,1
1,CYYZ,1971-01-02,38,5,21.5,1971,1
2,CYYZ,1971-01-03,35,29,32.0,1971,1
3,CYYZ,1971-01-04,42,30,36.0,1971,1
4,CYYZ,1971-01-05,32,18,25.0,1971,1


In [110]:
yearly_df = daily_load_data.groupby(by="year").mean()
yearly_df.head()

Unnamed: 0_level_0,Max Temp,Min Temp,Avg Temp
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1971,54.383562,35.213699,44.79863
1972,52.122951,33.333333,42.728142
1973,55.769863,37.068493,46.419178
1974,53.906849,34.794521,44.350685
1975,55.167123,36.775342,45.971233
