# Live Anomaly Detection

## Import Dependencies

In [1]:
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

import sys
sys.path.insert(1, '../src/data')

import matplotlib
import matplotlib.pyplot as plt

from sklearn.metrics import plot_confusion_matrix 
from sklearn.ensemble import RandomForestClassifier as rfc
from sklearn.model_selection import train_test_split 
from sklearn.inspection import permutation_importance, plot_partial_dependence

from imblearn.over_sampling import ADASYN, SMOTE, RandomOverSampler
from imblearn.pipeline import make_pipeline

from xml2dict import *
from dict2tabular import *

In [3]:
def xml2soup(xml_path: str):
    """ Loads xml into BeautifulSoup object.

    Params:
        xml_path (string) - Path to xml to be loaded

    Returns:
        soup (BeautifulSoup object)
    """
    xml_content = requests.get(xml_path)
    soup = BeautifulSoup(xml_content.content, 'lxml')
    return soup

def xml_extract_metadata(xml_soup):
    """ Extracts metadata for identification (station, time and location) from top of xml.

    Params:
        output_dict (dict) - Dictionary to add information to
        xml_soup (BeautifulSoup object) - beautifulSoup object containing the loaded xml information.

    Returns:
        output_dict (dict) - Updated dictionary with added metadata information
    """
    output_dict = dict()
    identification_elements = xml_soup.find("identification-elements")
    id_element_list = ['date_time', 'tc_identifier', 'station_name', 'station_elevation',
                       'latitude', 'longitude', 'version', 'correction', 'source_uri']
    for id_element in id_element_list:
        try:
            output_dict[id_element] = identification_elements.findChild(name='element',
                                                                        attrs={'name': id_element}
                                                                        ).get("value")
        except AttributeError as error:
            print(error, ". Possibly element '%s' is missing." % id_element)

    # Station identifier are missing in some xml files
    try:
        output_dict['station_identifier'] = identification_elements.findChild(name='element',
                                                                              attrs={'name': 'station_identifier'}
                                                                              ).get("value")
    except AttributeError as error:
        print('error sigma')

    observation_elements = xml_soup.find('om:result').find('elements').findAll('element')
    for each_elem in observation_elements:
        
        try:
            if each_elem['element-index']:
                
                dict_key = each_elem['name']+'_'+each_elem['orig-name']
                output_dict[dict_key+'_value'] = each_elem['value']
                #print(each_elem['orig-name'])
                #print(each_elem['value'])
                
                qc_soup = each_elem.find('quality-controlled')
                qc_summary_dict_key = dict_key+'_'+qc_soup.find('element')['name']
                output_dict[qc_summary_dict_key] = qc_soup.find('element')['value']
                
                # qc native tag
                qc_native = qc_soup.find('native').findAll('qualifier')
                for each_native in qc_native:
                    try:
                        output_dict[dict_key+'_'+each_native['name']] = each_native['value']
                    except:
                        print('error beta')
                        continue
                
                #print(output_dict)
                #print(qc_soup.find('real-time'))
                qc_element_list = qc_soup.find('real-time').find('element').findAll('element', recursive=False)
                #print(qc_element_list)
                for each_qc in qc_element_list:
                    try:
                        qc_dict_key = dict_key+'_qa-'+each_qc['name']
                        output_dict[qc_dict_key] = each_qc['value']
                    
                        qc_detail = each_qc.findAll('element')

                        for qc_det_item in qc_detail:
                            try:
                                qc_det_name = dict_key+'_qc-'+qc_det_item['name']+'_'+qc_det_item['value'].split('/')[6]
                                output_dict[qc_det_name] = qc_det_item.find('qualifier', {'name' : 'flag_value'})['value']
                            except:
                                print("error gamma")
                                continue
                    except:
                        print('error gamma')
                        continue
        except:

            continue
       # print(output_dict)

    #finally:
    return output_dict

## Import Live Data

In [4]:
items = []
for i in range(1, 6):

    content_url = 'http://dms.cmc.ec.gc.ca:8180/notification?path=/msc/observation/atmospheric/surface_weather/ca-1.1-ascii/decoded_qa_enhanced-xml-2.0&time='+str(i)+'d'
    print(content_url)
    content_data = requests.get(content_url)
    html = xml2soup(content_url)
    item = html.findAll('item')
    print(item)
    items.extend(item)

http://dms.cmc.ec.gc.ca:8180/notification?path=/msc/observation/atmospheric/surface_weather/ca-1.1-ascii/decoded_qa_enhanced-xml-2.0&time=1d


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [5]:
path = rfile='../anomalydetection/data/processed1/df_snow_dC:\Users\filipovicha\Documents\AI_Project\CLIMAT_Stations_2019-12_MIDAS_Errors.csv'
df = pd.read_csv(path, encoding='latin1')
print(df)

     ORDER_NO  STN_ID  WMO_ID CLIMATE_ID      ENG_STN_NAME PROVINCE TC_ID  \
0         122   26986   71984    2203058          PAULATUK     NWT   ZPK    
1          28    6845   71222   119BLM0   DEASE LAKE (AUT)      BC   WKX    
2         105   44204   71823   7093GJ5       LA GRANDE IV     QUE   GAH    
3          16   45567   71110    8403619       ST LAWRENCE     NFLD  ADS    
4          71   27205   71558    8403399        ST ANTHONY     NFLD  WDW    
..        ...     ...     ...        ...               ...      ...   ...   
123        26   50857   71208    6010740    BIG TROUT LAKE     ONT   OTL    
124        98   10220   71747   6020LPQ     ATIKOKAN (AUT)     ONT   WCH    
125       106   10721   71826    2403049       PANGNIRTUNG      NU   WXP    
126        70   26866   71550    5040681        DAUPHIN CS     MAN   WZT    
127        81   54098   71594    1192948       FORT NELSON      BC   VFN    

    STATION_TYPE  Count Network                Error Types  
0            A

In [6]:
station_id = ['nco', 'nek', 'zrp', 'mfj', 'zpk', 'nbi', 'eqi', 'ngh', 'nsg', 'wfz', 'wyh', 'wkx', 'gah', 'ads', 'wdw', 'wzv', 'wic', 'ndt', 'wsy', 'won', 'way', 'xcm', 'xux', 'xet', 'zlt', 'wij', 'wjc', 'nzs', 'wnv', 'wst', 'zcr', 'zel', 'xbl', 'mfm', 'acq', 'pjm', 'pif', 'who', 'wpz', 'xgd', 'zhk', 'pqw', 'wdv', 'wfp', 'wnz', 'xdi', 'wvt', 'pyq', 'apr', 'xqh', 'xfb', 'wpk', 'nvq', 'wct', 'wzg', 'xmm', 'zcy', 'web', 'wgd', 'xeg', 'vqz', 'xse', 'vxy', 'pqd', 'zvm', 'pzh', 'mjk', 'xox', 'zsm', 'aqy', 'ncd', 'zdb','xrb', 'xqb', 'zhy', 'abf', 'wgr',  'xnp', 'wdc', 'xwf', 'wsk', 'ahr', 'xar', 'zfs', 'wvc', 'xoa', 'ple', 'zsp', 'wtd', 'wrk', 'wsn',  'zhb', 'xmw', 'mrf', 'xto', 'ppr', 'nbb', 'xat', 'erm', 'xhi', 'zka', 'nco', 'adl', 'zoc', 'wyj', 'asb', 'apb', 'xzv', 'xha', 'xzc', 'wwn', 'wcf', 'xka', 'xrg', 'tze', 'wfz', 'zev', 'pgf', 'xtl', 'ybg', 'yyr', 'yod', 'yoj', 'ygq', 'yzt', 'ygl', 'yth', 'yvp', 'yxy', 'otl', 'wch', 'wxp', 'wzt']

In [7]:
tc_id_array = []
for index, value in df['TC_ID'].items():
    value = value[0:3]
    tc_id_array.append(value)
    
for value in station_id:
    if value.upper() not in tc_id_array:
        tc_id_array.append(value.upper())

station_id = []
for id in tc_id_array:
    station_id.append(id.lower())

['ZPK', 'WKX', 'GAH', 'ADS', 'WDW', 'WZV', 'WIC', 'NDT', 'WSY', 'WON', 'WAY', 'XCM', 'XUX', 'XET', 'ZLT', 'WIJ', 'WJC', 'NZS', 'WNV', 'WST', 'ZCR', 'ZEL', 'XBL', 'MFM', 'ACQ', 'PJM', 'PIF', 'WHO', 'WPZ', 'XGD', 'ZHK', 'PQW', 'WDV', 'WFP', 'WNZ', 'XDI', 'WVT', 'PYQ', 'APR', 'XQH', 'XFB', 'WPK', 'NVQ', 'WCT', 'WZG', 'XMM', 'ZCY', 'WEB', 'WGD', 'XEG', 'VQZ', 'XSE', 'VXY', 'PQD', 'ZVM', 'PZH', 'MJK', 'XOX', 'ZSM', 'AQY', 'NCD', 'ZDB', 'WMJ', 'XRB', 'XWB', 'ZHY', 'ABF', 'WGR', 'XNP', 'WDC', 'XWF', 'WSK', 'AHR', 'XAR', 'ZFS', 'WVC', 'XOA', 'ZRP', 'PLE', 'NEK', 'ZSP', 'WTD', 'WRK', 'WSN', 'ZHB', 'EQI', 'XMW', 'MRF', 'XTO', 'PPR', 'NBB', 'XAT', 'ERM', 'XHI', 'ZKA', 'NCO', 'ADL', 'ZOC', 'WYJ', 'ASB', 'APB', 'XZV', 'XHA', 'XZC', 'WWN', 'WCF', 'XKA', 'XRG', 'TZE', 'WFZ', 'ZEV', 'PGF', 'XTL', 'YBG', 'YYR', 'YOD', 'YOJ', 'YGQ', 'YZT', 'YGL', 'YTH', 'YVP', 'YXY', 'OTL', 'WCH', 'WXP', 'WZT', 'VFN', 'MFJ', 'NBI', 'NGH', 'NSG', 'WYH', 'XQB']
134


In [10]:
df = pd.DataFrame()
url_list = list()

for each in items:

    # get url of each observation and ignore supporting xmls (supp_1440)
    if each.find('title').contents[0].split('/')[2] in station_id and 'supp_1440' not in each.find('title').contents[0]:
        url_list.append(each.contents[2])
        #parse url and create a table with each row as observation, and element value as columns
        soupu = xml2soup(each.contents[2])
        extracted = xml_extract_metadata(soupu)
        df = df.append(extracted, ignore_index=True)

In [11]:
# drop if any columns contains null
newDf = df.dropna(how='any', axis=1)
newDf.columns

Index(['date_time', 'tc_identifier', 'station_name', 'station_elevation',
       'latitude', 'longitude', 'version', 'correction', 'source_uri',
       'station_identifier',
       ...
       'cumulative_precipitation_gauge_weight_unfiltered_3020_value',
       'cumulative_precipitation_gauge_weight_unfiltered_3020_overall_qa_summary',
       'cumulative_precipitation_gauge_weight_unfiltered_3020_error',
       'cumulative_precipitation_gauge_weight_unfiltered_3020_suspect',
       'cumulative_precipitation_gauge_weight_unfiltered_3020_suppressed',
       'cumulative_precipitation_gauge_weight_unfiltered_3021_value',
       'cumulative_precipitation_gauge_weight_unfiltered_3021_overall_qa_summary',
       'cumulative_precipitation_gauge_weight_unfiltered_3021_error',
       'cumulative_precipitation_gauge_weight_unfiltered_3021_suspect',
       'cumulative_precipitation_gauge_weight_unfiltered_3021_suppressed'],
      dtype='object', length=261)

## Import Snow Training Data

In [12]:
X = read_pickle(file='../anomalydetection/data/processed1/df_snow_depth_3022_2019.pickle')
X_2019 = read_pickle(file='../anomalydetection/data/processed1/df_snow_depth_3022_2020.pickle')
X_2020 = read_pickle(file='../anomalydetection/data/processed1/df_snow_depth_3022_2021_jan_jun.pickle')

Xsnow = pd.concat([X, X_2019], ignore_index=True, sort=False)
Xsnow = pd.concat([X, X_2020], ignore_index=True, sort=False)

In [13]:
Xsnow = Xsnow.dropna(how='any', axis=1)

## Import Wind Training Data

In [14]:
Xwind = read_pickle(r'C:\Users\filipovicha\Documents\AI_Project\moov-ai-automatic-qc\data\processed_before_nf\wind_speed_3005.pickle')

In [15]:
Xwind = Xwind.dropna(how='any', axis=1)

## Import Precip Training Data

In [16]:
Xprecip = read_pickle(r'C:\Users\filipovicha\Documents\AI_Project\moov-ai-automatic-qc\data\processed_before_nf\precipitation_amount_285.pickle')

In [17]:
Xprecip = Xprecip.dropna(how='any', axis=1)

## Detect Snow Depth Anomalies

In [18]:
common_snow_elems = list(np.intersect1d(Xsnow.columns, newDf.columns))
test_real_time = newDf[common_snow_elems] 
common_snow_elems.append('snow_depth_3022_target')
train_real_time = Xsnow[common_snow_elems]
train_y = train_real_time['snow_depth_3022_target']

In [19]:
# keeping the the date and time for anomaly detection
train_real_time['hour_of_day'] = train_real_time.date_time.dt.hour
train_real_time['yearz'] = train_real_time.date_time.dt.year
train_real_time['monthz'] = train_real_time.date_time.dt.month
train_real_time['dayz'] = train_real_time.date_time.dt.day

test_real_time['hour_of_day'] = train_real_time.date_time.dt.hour
test_real_time['yearz'] = train_real_time.date_time.dt.year
test_real_time['monthz'] = train_real_time.date_time.dt.month
test_real_time['dayz'] = train_real_time.date_time.dt.day

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_real_time['hour_of_day'] = train_real_time.date_time.dt.hour
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_real_time['yearz'] = train_real_time.date_time.dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_real_time['monthz'] = train_real_time.date_time.dt.month
A value is try

In [20]:
del_cols = ['origin_filename', 'station_time_identifier', 'tc_identifier', 'date_time',
            'station_name', 'version', 'correction', 'source_uri', 'station_identifier', '_merge']


train_real_time.drop(del_cols , axis = 1, inplace=True, errors='ignore') 
test_real_time.drop(del_cols , axis = 1, inplace=True, errors='ignore') 

# removing target variable form training set
train_real_time.drop('snow_depth_3022_target', axis = 1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [21]:
# The Actual real-Time Train and Test
rfc_estimator2 = rfc(n_estimators=1000, 
                    min_samples_leaf=2, 
                    n_jobs=7)
sampler = ADASYN()
clf = make_pipeline(sampler, rfc_estimator2)
    
rezult = clf.fit(train_real_time, train_y).predict(test_real_time)

In [22]:
# merging the prediction/classification to the observations
classification = list(rezult)
test_real_time['classes'] = classification
test_real_time['url'] = url_list

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_real_time['classes'] = classification
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_real_time['url'] = url_list


In [23]:
overturn = test_real_time.loc[test_real_time['classes'] == 1]
snow_list = overturn['url']

## Detect Wind Speed Anomalies

In [24]:
test_real_time = test_real_time.drop([col for col in test_real_time.columns if test_real_time[col].eq('MSNG').any()], axis=1)
common_elems = list(np.intersect1d(test_real_time.columns, train_real_time.columns))
common_elems.append('wind_speed_3005_target')
train_real_time = train_real_time[common_elems]

KeyError: "['wind_speed_3005_target'] not in index"

In [None]:
del_cols = ['origin_filename', 'station_time_identifier', 'tc_identifier', 'date_time',
            'station_name', 'version', 'correction', 'source_uri', 'station_identifier', '_merge']


train_real_time.drop(del_cols , axis = 1, inplace=True, errors='ignore') 
test_real_time.drop(del_cols , axis = 1, inplace=True, errors='ignore') 

# removing target variable form training set
train_real_time.drop('wind_speed_3005_target', axis = 1, inplace=True)

In [None]:
# The Actual real-Time Train and Test
rfc_estimator2 = rfc(n_estimators=1000, 
                    min_samples_leaf=2, 
                    n_jobs=7)
sampler = ADASYN()
clf = make_pipeline(sampler, rfc_estimator2)
    
rezult = clf.fit(train_real_time, train_y).predict(test_real_time)

In [None]:
# merging the prediction/classification to the observations
classification = list(rezult)
test_real_time['classes'] = classification
test_real_time['url'] = url_list

In [None]:
overturn = test_real_time.loc[test_real_time['classes'] == 1]
wind_list = overturn['url']

## Detect Precip Anomalies

In [None]:
common_precip_elems = list(np.intersect1d(Xprecip.columns, newDf.columns))
test_real_time = newDf[common_precip_elems] 
common_precip_elems.append('precipitation_amount_285_target')
train_real_time = Xprecip[common_precip_elems]
train_y = train_real_time['precipitation_amount_285_target']

In [None]:
del_cols = ['origin_filename', 'station_time_identifier', 'tc_identifier', 'date_time',
            'station_name', 'version', 'correction', 'source_uri', 'station_identifier', '_merge']


train_real_time.drop(del_cols , axis = 1, inplace=True, errors='ignore') 
test_real_time.drop(del_cols , axis = 1, inplace=True, errors='ignore') 

# removing target variable form training set
train_real_time.drop('precipitation_amount_285_target', axis = 1, inplace=True)

In [None]:
# The Actual real-Time Train and Test
rfc_estimator2 = rfc(n_estimators=1000, 
                    min_samples_leaf=2, 
                    n_jobs=7)
sampler = ADASYN()
clf = make_pipeline(sampler, rfc_estimator2)
    
rezult = clf.fit(train_real_time, train_y).predict(test_real_time)

In [None]:
# merging the prediction/classification to the observations
classification = list(rezult)
test_real_time['classes'] = classification
test_real_time['url'] = url_list

In [None]:
overturn = test_real_time.loc[test_real_time['classes'] == 1]
precip_list = overturn['url']

## Present data from list output

In [None]:
def extract_info(source: str):
    newstring = source.rsplit('/')
    date = newstring[10] 
    stat = newstring[12]
    return [date, stat]
 
def make_dict(sources: list):
    dicto = {}
    for source in sources:
        stat = extract_info(source)[1]
        date = extract_info(source)[0]
        
        #check if stat is in sources:
        if stat not in dicto.keys():
            dicto[stat] = {'count': 1, 'earliest' : date, 'latest' : date}
            
        else:
            if date < dicto[stat]['earliest']:
                dicto[stat]['count'] = dicto[stat]['count'] + 1
                dicto[stat]['earliest'] = date
                
            elif date > dicto[stat]['latest']:
                dicto[stat]['count'] = dicto[stat]['count'] + 1
                dicto[stat]['latest'] = date
                
            else:
                dicto[stat]['count'] = dicto[stat]['count'] + 1
    
    return dicto 

In [None]:
snow_depth_anomalies = make_dict(snow_list)
wind_speed_anomalies = make_dict(wind_list)
precip_anomalies = make_dict(precip_list)

In [None]:
print('Snow Depth Anomalies')
print(snow_depth_anomalies)

print('Wind Speed Anomalies')
print(wind_speed_anomalies)

print('Precip Anomalies')
print(precip_anomalies)

In [None]:
print('snow depth, std-pkg-id="1.11.174.2.5.3.0"')

for key in snow_depth_anomalies.keys():
    early_year = snow_depth_anomalies[key]['earliest'][0] + snow_depth_anomalies[key]['earliest'][1] + snow_depth_anomalies[key]['earliest'][2] +snow_depth_anomalies[key]['earliest'][3]
    early_month = snow_depth_anomalies[key]['earliest'][4] + snow_depth_anomalies[key]['earliest'][5]
    early_day = snow_depth_anomalies[key]['earliest'][6] + snow_depth_anomalies[key]['earliest'][7]
    early_hour = snow_depth_anomalies[key]['earliest'][8] + snow_depth_anomalies[key]['earliest'][9]
    late_year = snow_depth_anomalies[key]['latest'][0] + snow_depth_anomalies[key]['latest'][1] + snow_depth_anomalies[key]['latest'][2] +snow_depth_anomalies[key]['latest'][3]
    late_month = snow_depth_anomalies[key]['latest'][4] + snow_depth_anomalies[key]['latest'][5]
    late_day = snow_depth_anomalies[key]['latest'][6] + snow_depth_anomalies[key]['latest'][7]
    late_hour = snow_depth_anomalies[key]['latest'][8] + snow_depth_anomalies[key]['latest'][9]
    url = 'https://dw.cmc.ec.gc.ca/services/pegasus/viewer/?stationID=' + key + '&from=' + str(early_year) + '-' + str(early_month) + '-' + str(early_day) + 'T' + str(early_hour) + ':00&to=' + str(late_year) + '-' + str(late_month) + '-' + str(late_day) + 'T' + str(late_hour) + ':00'
    print(url + ' with count ' + str(snow_depth_anomalies[key]['count']))
    
print('precipitation amount, std-pkg-id="1.11.171.1.60.5.0"')

for key in precip_anomalies.keys():
    early_year = precip_anomalies[key]['earliest'][0] + precip_anomalies[key]['earliest'][1] + precip_anomalies[key]['earliest'][2] +precip_anomalies[key]['earliest'][3]
    early_month = precip_anomalies[key]['earliest'][4] + precip_anomalies[key]['earliest'][5]
    early_day = precip_anomalies[key]['earliest'][6] + precip_anomalies[key]['earliest'][7]
    early_hour = precip_anomalies[key]['earliest'][8] + precip_anomalies[key]['earliest'][9]
    late_year = precip_anomalies[key]['latest'][0] + precip_anomalies[key]['latest'][1] + precip_anomalies[key]['latest'][2] +precip_anomalies[key]['latest'][3]
    late_month = precip_anomalies[key]['latest'][4] + precip_anomalies[key]['latest'][5]
    late_day = precip_anomalies[key]['latest'][6] + precip_anomalies[key]['latest'][7]
    late_hour = precip_anomalies[key]['latest'][8] + precip_anomalies[key]['latest'][9]
    url = 'https://dw.cmc.ec.gc.ca/services/pegasus/viewer/?stationID=' + key + '&from=' + str(early_year) + '-' + str(early_month) + '-' + str(early_day) + 'T' + str(early_hour) + ':00&to=' + str(late_year) + '-' + str(late_month) + '-' + str(late_day) + 'T' + str(late_hour) + ':00'
    print(url + ' with count ' + str(precip_anomalies[key]['count']))

print('wind speed, std-pkg-id="1.24.314.2.2.2.6"')

for key in wind_speed_anomalies.keys():
    early_year = wind_speed_anomalies[key]['earliest'][0] + wind_speed_anomalies[key]['earliest'][1] + wind_speed_anomalies[key]['earliest'][2] +wind_speed_anomalies[key]['earliest'][3]
    early_month = wind_speed_anomalies[key]['earliest'][4] + wind_speed_anomalies[key]['earliest'][5]
    early_day = wind_speed_anomalies[key]['earliest'][6] + wind_speed_anomalies[key]['earliest'][7]
    early_hour = wind_speed_anomalies[key]['earliest'][8] + wind_speed_anomalies[key]['earliest'][9]
    late_year = wind_speed_anomalies[key]['latest'][0] + wind_speed_anomalies[key]['latest'][1] + wind_speed_anomalies[key]['latest'][2] +wind_speed_anomalies[key]['latest'][3]
    late_month = wind_speed_anomalies[key]['latest'][4] + wind_speed_anomalies[key]['latest'][5]
    late_day = wind_speed_anomalies[key]['latest'][6] + wind_speed_anomalies[key]['latest'][7]
    late_hour = wind_speed_anomalies[key]['latest'][8] + wind_speed_anomalies[key]['latest'][9]
    url = 'https://dw.cmc.ec.gc.ca/services/pegasus/viewer/?stationID=' + key + '&from=' + str(early_year) + '-' + str(early_month) + '-' + str(early_day) + 'T' + str(early_hour) + ':00&to=' + str(late_year) + '-' + str(late_month) + '-' + str(late_day) + 'T' + str(late_hour) + ':00'
    print(url + ' with count ' + str(wind_speed_anomalies[key]['count']))
    