In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:70% !important; }</style>"))

In [4]:
import warnings

In [5]:
%%script false --no-raise-error
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

Couldn't find program: 'false'


In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Environment Canada Weather Information

In [8]:
def countData(dataFrame, dictionary, column, dataQuantity, measName=''):
    '''
    Description: It calculate the pecentage of data available  
    '''
    dataSet = dictionary.copy()
    newdf = dataFrame.copy()
    pct = list()
    for station in dataSet:
        data = dataSet[station]
        pct.append(round(100*data[column].count()/dataQuantity,2))
        
    newdf[measName+" Available Data"+' (%)'] = pct
    return newdf

In [9]:
def fltrbyDQ(dictionary, column, dataQuantity, threshold = 0.6):
    '''
    Description: Filter by Data Quantity. It returns a list with keys that don't have enough data according to the threshold passed.
    '''
    dataSet = dictionary.copy()
    statList = list()
    for station in dataSet:
        data = dataSet[station]
        if data[column].count()/dataQuantity < threshold:
            statList.append(station)
    return statList

In [10]:
def removeItems(dictionary, keys):
    '''
    Description: Remove a list of keys from a dict object.
    '''
    d = dictionary.copy()
    for key in keys:
        d.pop(key)
    return d

In [11]:
# get Julian Day
def getJulianDay(dt, dateCol, yearCol, monthCol, dayCol):
    import datetime as dttm
    
    julianDayCol = "days"
    
    data = dt.copy()
    
    #split datetime into date and time columns
    datetime = np.asarray([d.split(' ') for d in data[dateCol]], dtype='str')
    data[dateCol] = datetime[:,0]
    
    # get year month and day from Date
    date = np.asarray([d.split('-') for d in data[dateCol].values], dtype='uint16')
    
    # create new coluns
    data[yearCol]  = date[:,0]
    data[monthCol] = date[:,1]
    data[dayCol]   = date[:,2]
    data[julianDayCol] = 0
    
    for date in data[dateCol].unique():
        d = dttm.datetime.strptime(date, "%Y-%m-%d") # get datetime object
        data.loc[ (data[yearCol] == d.year) & (data[monthCol] == d.month) & (data[dayCol] == d.day), julianDayCol] = dttm.date(d.year,d.month,d.day).toordinal()
    
    data[julianDayCol] += 1 - data[julianDayCol].min()
    return data

def getMaxMin(dt, column, daysCol):
    '''
    Description:
        It computes max and min value of the specified column passed for each day. 
        It returns a list with the values for each day.
    Input:
        column: The name of the column of interest.
    Output:
        A list with lists containing max and min value for each day. 
        As the following example:

        returning list := [[max1,min1], [max2,min2], ...,[maxN,minN]]
    '''
    julianDayCol = daysCol 
    data = dt.copy()
    
    data = data.dropna(subset=[column])
    
    mxmn_list = list()
    for day in data[julianDayCol].unique():
        d = data[data[julianDayCol] == day] 
        mxmn_list.append([round(np.max(d[column]),2), round(np.min(d[column]),2)])
    
    cols = [julianDayCol, 'Max ' + column, 'Min ' +  column]

    days = np.asarray(data[julianDayCol].unique()).reshape((-1,1))
    mxmn = np.asarray(mxmn_list)
    
    values = np.concatenate((days, mxmn), axis=1)
    newdf = pd.DataFrame(values, columns = cols)
    newdf['DATE'] = data['Date/Time'].unique()
    newdf['days'] = newdf['days'].astype('uint8')
    return newdf

##  Data

In [30]:
closestT = {'5574':'53001', '5415':'51157'}

In [31]:
dataPath     = "../dataset/envcan/"
IDs = ['51157', '53001']
from datetime import date
delta = date(2018, 8, 31) - date(2018, 6, 1)

In [32]:
dataSet = dict()

#It opens them
for file in IDs:
    data = pd.read_csv(dataPath+file+'.csv')
    data = data.dropna(axis = 0, how = 'all')
    dataSet[file] = data

In [33]:
hrDataSet = dict()

for station in dataSet:
    data = dataSet[station]

    data = getJulianDay(data, 'Date/Time', 'Year', 'Month', 'Day')
    newdf = getMaxMin(data, 'Temp (°C)', 'days')
    hrDataSet[station] = newdf

## 1981-2010 Climate Normal

In [34]:
cnDataSet = dict()

IDs = ['5574', '5415']
dataPath = "../dataset/envcan/"

#It opens them
for ID in IDs:
    data = pd.read_csv(dataPath+str(ID)+'.csv')
    data = data.dropna(axis = 0, how = 'all')
    cnDataSet[str(ID)] = data

## Heatwave Analysis

In [39]:
import Extreme_Waves as ew

In [40]:
for code in hrDataSet:
    hrDataSet[code].rename(columns={"Date/Time": "DATE"}, inplace=True)
    hrDataSet[code] = hrDataSet[code][['Max Temp (°C)','Min Temp (°C)','DATE']]

In [41]:
for code in cnDataSet:
    cnDataSet[code].rename(columns={"Date/Time": "DATE"}, inplace=True)
    cnDataSet[code]= cnDataSet[code][['Max Temp (°C)','Min Temp (°C)','DATE']]

In [53]:
#%%script false --no-raise-error

#Run this cell just if you don't have the output saved in your machine. It takes almost 2h30 to complete this task.
warnings.filterwarnings("ignore")

for i in closestT:
    print('Computing Heatwaves For Station: ' + str(closestT[i]))
    cnDataSet[i]['DATE'] = pd.to_datetime(cnDataSet[i]['DATE'])
    hrDataSet[closestT[i]]['DATE'] = pd.to_datetime(hrDataSet[closestT[i]]['DATE'])
    hrDataSet[closestT[i]] = ew.check_HeatWave(climatic_normal=cnDataSet[i], pct_columnMAX='Max Temp (°C)', 
                          pct_columnMIN='Min Temp (°C)', cn_columnDay365 = 0, database=hrDataSet[closestT[i]], 
                          db_columnMAX='Max Temp (°C)', db_columnMIN='Min Temp (°C)', db_columnDay365 = 0,
                          db_complete=0, percentile_value=0.9, window_size=30)
    hrDataSet[closestT[i]].to_csv('heatwave_'+key+'.csv')

Computing Heatwaves For Station: 53001


ValueError: No axis named 1 for object type <class 'pandas.core.series.Series'>

In [52]:
hrDataSet[closestT[i]]

Unnamed: 0,Max Temp (°C),Min Temp (°C),DATE,DAY365
0,17.6,7.8,2015-06-01,152
1,16.0,6.8,2015-06-02,153
2,21.0,6.5,2015-06-03,154
3,25.5,7.8,2015-06-04,155
4,21.2,12.3,2015-06-05,156
...,...,...,...,...
455,25.2,11.5,2019-08-27,239
456,22.5,18.0,2019-08-28,240
457,23.4,12.9,2019-08-29,241
458,22.1,14.8,2019-08-30,242


In [58]:
import os

try:
    os.mkdir('/home/guilherme/Documents/GIT - REPOSITORIES/dataset/EnvironmentCanada/Quebec-Heatwaves/')
    os.mkdir('/home/guilherme/Documents/GIT - REPOSITORIES/dataset/EnvironmentCanada/Quebec-Heatwaves/Data/')
    os.mkdir('/home/guilherme/Documents/GIT - REPOSITORIES/dataset/EnvironmentCanada/Quebec-Heatwaves/Graphs/')
except:
    pass

In [59]:
import os
path = '/home/guilherme/Documents/GIT - REPOSITORIES/dataset/EnvironmentCanada/Quebec-Heatwaves/Data/'
files = os.listdir(path)
hrDataSet = dict()
for file in files:
    data = pd.read_csv(path+file)
    data['DATE'] = pd.to_datetime(data['DATE'])
    hrDataSet[file.replace('.csv','')] = data

In [60]:
for key in hrDataSet:
    data = hrDataSet[key]
    data = data.set_index('DATE')
    months = data.index.month
    mask = ((months == 6).astype('uint8') + (months == 7).astype('uint8') + (months == 8).astype('uint8')).astype(bool)
    data = hrDataSet[key].iloc[mask]
    hrDataSet[key] = (data, ew.wave_metrics(data, 'HW'))

In [61]:
def label_heatwaves(data):
    import numpy as np
    from scipy.ndimage.measurements import label

    array = np.asarray([data['HW']])
    structure = np.ones((3, 3), dtype=np.int)  # this defines the connection filter
    labeled, ncomponents = label(array, structure)
    
    return labeled[0]

In [62]:
def hw_metrics_plot(fig, ax, data, xlabels, ylabel, title, bar_labels, width = 0.25):
    import matplotlib
    import matplotlib.pyplot as plt
    import numpy as np

    x = np.arange(len(xlabels))  # the label locations
    
    rects1 = ax.bar(x - width, data.iloc[:,0].values.astype("uint8"), width, label=bar_labels[0])
    rects2 = ax.bar(x , data.iloc[:,1].values.astype("uint8"), width, label=bar_labels[1])
    rects3 = ax.bar(x + width, data.iloc[:,2].values.astype("uint8"), width, label=bar_labels[2])

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    ax.set_xticks(x)
    ax.set_xticklabels(xlabels)
    ax.legend()


    def autolabel(rects):
        """Attach a text label above each bar in *rects*, displaying its height."""
        for rect in rects:
            height = rect.get_height()
            ax.annotate('{}'.format(height),
                        xy=(rect.get_x() + rect.get_width() / 2, height),
                        xytext=(0, 3),  # 3 points vertical offset
                        textcoords="offset points",
                        ha='center', va='bottom')


    autolabel(rects1)
    autolabel(rects2)
    autolabel(rects3)

    fig.tight_layout()

In [63]:
def HCW_waves_plot(fig, ax, x1, x2, y1, y2, xlabel, ylabel, legend1, legend2, title):
    import matplotlib
    
    ax.set_title(title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.bar(x1, y1, color='blue', label= legend1)
    ax.bar(x2, y2, color='red' , label= legend2)
    
    # Turn on the minor TICKS, which are required for the minor GRID
    ax.minorticks_on()

    # Customize the major grid
    #ax.grid(which='major', axis='y', linestyle='-', linewidth='0.5', color='black')
    # Customize the minor grid
    ax.grid(which='both', axis='y', linestyle=':', linewidth='0.5', color='black')
    
    handles, labels = ax.get_legend_handles_labels()
    i = 0
    while i < len(handles):
        if type(handles[i]) != matplotlib.lines.Line2D and len(handles[i]) == 0:
            handles.remove(handles[i])
            labels.remove(labels[i])
        else:
            i+=1
    
    ax.legend(handles, labels, bbox_to_anchor=(0.9, 1.05))

In [89]:
def HCW_pct_plot(fig, ax, dates, tmax, tmin, pct90mx, pct90mn, 
                 HW_dates, HW_tmax, HW_pct90mx, HW_tmin, HW_pct90mn, 
                 xlabel='', ylabel='', legend=['','','','',''], title=''):
    
    import matplotlib
    
    x = dates[range(0,len(dates),5)]
    
    ax.set_title(title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_xticks(x)
    ax.set_xticklabels(x)
    
    ax.plot(dates, tmax, color='red', label= legend[0])
    ax.plot(dates, tmin, color='blue', label= legend[1])
    ax.plot(dates, pct90mx, color='red', linestyle='--', label = legend[2])
    ax.plot(dates, pct90mn, color='blue', linestyle='--', label = legend[3])
    
    for i in range(len(HW_tmax)):
        ax.fill_between(HW_dates[i], HW_tmax[i], HW_pct90mx[i], where=HW_tmax[i] > HW_pct90mx[i], 
                        facecolor='red', interpolate=False,label=legend[4])
        ax.fill_between(HW_dates[i], HW_tmin[i], HW_pct90mn[i], where=HW_tmin[i] > HW_pct90mn[i], 
                        facecolor='blue', interpolate=False,label=legend[4])
    
    # Turn on the minor TICKS, which are required for the minor GRID
    ax.minorticks_on()

    # Customize the major grid
    #ax.grid(which='major', axis='y', linestyle='-', linewidth='0.5', color='black')
    # Customize the minor grid
    ax.grid(which='both', axis='both', linestyle=':', linewidth='0.5', color='black')
    
    handles, labels = ax.get_legend_handles_labels()
    i = 0
    while i < len(handles):
        if type(handles[i]) != matplotlib.lines.Line2D and type(handles[i]) != matplotlib.collections.PolyCollection and len(handles[i]) == 0:
            handles.remove(handles[i])
            labels.remove(labels[i])
        else:
            i+=1
    
    ax.legend(handles, labels, bbox_to_anchor=(0.9, 1.05))

In [72]:
# Turn interactive plotting off
plt.ioff()
path = "Graphs/"
for key in hrDataSet:
    print("Ploting Heatwaves for Station "+key+'...')
    
    name = metaData.loc[metaData['Station ID'] == int(key)]['Name'].values[0]
    bar_labels = ['Number of Heatwaves','Longest Heatwave Duration in Days','Total of Days With Heatwave']

    fig, ax = plt.subplots(3,1,figsize=(16,16))
    hw_metrics_plot(fig = fig, ax = ax[0], data=hrDataSet[key][1], xlabels = ['Summer 2018', 'Summer 2019'], 
                    ylabel='', title="Station: "+name+". Heatwave Metrics", bar_labels=bar_labels)

    data = hrDataSet[key][0]
    data = data.set_index('DATE')
    year = data.index.year
    mask1 = year == 2018
    mask2 = year == 2019
    data = hrDataSet[key][0].iloc[mask1]

    y1 = data.loc[data['HW'] == 0]['Max Temp (°C)'].values
    x1 = data.loc[data['HW'] == 0]['DATE'].values
    y2 = data.loc[data['HW'] == 1]['Max Temp (°C)'].values
    x2 = data.loc[data['HW'] == 1]['DATE'].values

    HCW_waves_plot(fig, ax[1], x1, x2, y1, y2, 'Date', 'Max Temperature (°C)', 'Normal', 'Heatwave', 
                  "Station: "+name+". Heatwaves in Quebec during summer, 2018")
    data = hrDataSet[key][0].iloc[mask2]
    y1 = data.loc[data['HW'] == 0]['Max Temp (°C)'].values
    x1 = data.loc[data['HW'] == 0]['DATE'].values
    y2 = data.loc[data['HW'] == 1]['Max Temp (°C)'].values
    x2 = data.loc[data['HW'] == 1]['DATE'].values

    HCW_waves_plot(fig, ax[2], x1, x2, y1, y2, 'Date', 'Max Temperature (°C)', 'Normal', 'Heatwave', 
                  "Station: "+name+". Heatwaves in Quebec during summer, 2019")

    fig.savefig(path+key+'-1.png')
    plt.close(fig)
print('task finished.')

Ploting Heatwaves for Station 27646...
Ploting Heatwaves for Station 49608...
Ploting Heatwaves for Station 5397...
Ploting Heatwaves for Station 45309...
Ploting Heatwaves for Station 27377...
Ploting Heatwaves for Station 5606...
Ploting Heatwaves for Station 48374...
Ploting Heatwaves for Station 10801...
Ploting Heatwaves for Station 47587...
Ploting Heatwaves for Station 29452...
Ploting Heatwaves for Station 53002...
Ploting Heatwaves for Station 53001...
Ploting Heatwaves for Station 10873...
Ploting Heatwaves for Station 5237...
Ploting Heatwaves for Station 10843...
Ploting Heatwaves for Station 42013...
Ploting Heatwaves for Station 51157...
Ploting Heatwaves for Station 5911...
Ploting Heatwaves for Station 10868...
Ploting Heatwaves for Station 10982...
Ploting Heatwaves for Station 5903...
Ploting Heatwaves for Station 10761...
task finished.


In [92]:
# Turn interactive plotting off
plt.ioff()
path = "Graphs/"
for key in hrDataSet:
    print("Ploting Algorithm demonstration for Station "+key+'...')
    
    name = metaData.loc[metaData['Station ID'] == int(key)]['Name'].values[0]
    fig, ax = plt.subplots(2,1,figsize=(16,16))
    
    legend = ['Max Temperature', 'Min Temperature', '90th Tmax Percentile', '90th Tmin Percentile', 'Heatwave']
    
    data = hrDataSet[key][0]
    data = data.set_index('DATE')
    year = data.index.year
    mask1 = year == 2018
    mask2 = year == 2019
    
    data = hrDataSet[key][0].iloc[mask1]
    dates = data['DAY365'].values
    tmax, tmin = data['Max Temp (°C)'].values, data['Min Temp (°C)'].values
    pct90mx, pct90mn = data['CTX90pct'].values, data['CTN90pct'].values
    
    data['Heatwave Label'] = label_heatwaves(data)
    
    HW_dates = []
    HW_tmax = []
    HW_tmin = []
    HW_pct90mx = []
    HW_pct90mn = []
    for label in data['Heatwave Label'].unique():
        if label == 0:
            continue
        HW_dates.append(data.loc[data['Heatwave Label'] == label]['DAY365'].values)
        HW_tmax.append(data.loc[data['Heatwave Label'] == label]['Max Temp (°C)'].values)
        HW_tmin.append(data.loc[data['Heatwave Label'] == label]['Min Temp (°C)'].values)
        HW_pct90mx.append(data.loc[data['Heatwave Label'] == label]['CTX90pct'].values)
        HW_pct90mn.append(data.loc[data['Heatwave Label'] == label]['CTN90pct'].values)
    
    title = 'Heatwave Analysis during Summer of 2018'
    HCW_pct_plot(fig, ax[0], dates, tmax, tmin, pct90mx, pct90mn, 
                HW_dates, HW_tmax, HW_pct90mx, HW_tmin, HW_pct90mn, 
                xlabel='Day', ylabel='Temperature (°C)', legend=legend, title=title)
    
    data = hrDataSet[key][0].iloc[mask2]
    dates = data['DAY365'].values
    tmax, tmin = data['Max Temp (°C)'].values, data['Min Temp (°C)'].values
    
    warnings.filterwarnings("ignore")
    data['CTX90pct'] = pct90mx
    data['CTN90pct'] = pct90mn
    
    data['Heatwave Label'] = label_heatwaves(data)
    HW_dates = []
    HW_tmax = []
    HW_tmin = []
    HW_pct90mx = []
    HW_pct90mn = []
    for label in data['Heatwave Label'].unique():
        if label == 0:
            continue
        HW_dates.append(data.loc[data['Heatwave Label'] == label]['DAY365'].values)
        HW_tmax.append(data.loc[data['Heatwave Label'] == label]['Max Temp (°C)'].values)
        HW_tmin.append(data.loc[data['Heatwave Label'] == label]['Min Temp (°C)'].values)
        HW_pct90mx.append(data.loc[data['Heatwave Label'] == label]['CTX90pct'].values)
        HW_pct90mn.append(data.loc[data['Heatwave Label'] == label]['CTN90pct'].values)
    
    title = 'Heatwave Analysis during Summer, 2019'
    HCW_pct_plot(fig, ax[1], dates, tmax, tmin, pct90mx, pct90mn, 
                HW_dates, HW_tmax, HW_pct90mx, HW_tmin, HW_pct90mn, 
                xlabel='Day', ylabel='Temperature (°C)', legend=legend, title=title)

    fig.savefig(path+key+'-2.png')
    plt.close(fig)


print('task finished.')

Ploting Algorithm demonstration for Station 27646...
Ploting Algorithm demonstration for Station 49608...
Ploting Algorithm demonstration for Station 5397...
Ploting Algorithm demonstration for Station 45309...
Ploting Algorithm demonstration for Station 27377...
Ploting Algorithm demonstration for Station 5606...
Ploting Algorithm demonstration for Station 48374...
Ploting Algorithm demonstration for Station 10801...
Ploting Algorithm demonstration for Station 47587...
Ploting Algorithm demonstration for Station 29452...
Ploting Algorithm demonstration for Station 53002...
Ploting Algorithm demonstration for Station 53001...
Ploting Algorithm demonstration for Station 10873...
Ploting Algorithm demonstration for Station 5237...
Ploting Algorithm demonstration for Station 10843...
Ploting Algorithm demonstration for Station 42013...
Ploting Algorithm demonstration for Station 51157...
Ploting Algorithm demonstration for Station 5911...
Ploting Algorithm demonstration for Station 10868.

In [93]:
def generate_report(station_name, station_ID, lpath, spath):
    from PIL import Image

    image1 = Image.open(lpath+station_ID+'-1.png')
    image2 = Image.open(lpath+station_ID+'-2.png')

    im1 = image1.convert('RGB')
    im2 = image2.convert('RGB')

    im1.save(spath+station_name+'.pdf',save_all=True, append_images=[im2])

In [94]:
ld = os.listdir('Graphs/')
keys = [k.replace('-1.png','').replace('-2.png','') for k in ld]
keys = list(dict.fromkeys(keys))
for key in keys:
    print("Generating Report for Station "+key+'...')
    
    name = metaData.loc[metaData['Station ID'] == int(key)]['Name'].values[0]
    name = name.replace('/','-')
    generate_report(name, key, 'Graphs/','Reports/')
print('done.')

Generating Report for Station 10982...
Generating Report for Station 47587...
Generating Report for Station 10868...
Generating Report for Station 53001...
Generating Report for Station 10843...
Generating Report for Station 48374...
Generating Report for Station 27377...
Generating Report for Station 27646...
Generating Report for Station 5606...
Generating Report for Station 5397...
Generating Report for Station 10761...
Generating Report for Station 5237...
Generating Report for Station 42013...
Generating Report for Station 10801...
Generating Report for Station 5911...
Generating Report for Station 53002...
Generating Report for Station 29452...
Generating Report for Station 51157...
Generating Report for Station 5903...
Generating Report for Station 49608...
Generating Report for Station 10873...
Generating Report for Station 45309...
done.
