## IMPORT - Modules and raw data

In [1]:
import pandas as pd

pd.set_option('precision', 4)


In [None]:
stations = pd.read_csv(
    '../raw/RAW 2021 ENVIRONMENT CANADA WEATHER STATIONS.csv', encoding="latin-1", header=2)


In [2]:
active_stations = stations[stations["Last Year"] == 2021]
all_stations_list = active_stations["Station ID"].to_list()

bc_stations = stations[stations["Province"] == 'BRITISH COLUMBIA']
station_id_list = bc_stations["Station ID"].to_list()


This code block takes a few minutes to run. It grabs all records from 2021 for stations across Canada.

In [None]:
li = []

for station_id in all_stations_list:
    df = pd.read_csv('https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=' +
                     str(station_id) + '&Year=2021&timeframe=2')
    df.insert(0, "Station ID", station_id)
    li.append(df)

all_stations_raw = (pd.concat(li, axis=0, ignore_index=True)
                    .to_csv('../raw/RAW 2021 ENVIRONMENT CANADA ALL WEATHER STATIONS.csv')
                    )


Run it once, then run this block instead.

In [None]:
raw = pd.read_csv(
    '../raw/RAW 2021 ENVIRONMENT CANADA ALL WEATHER STATIONS.csv')


## 2021/11/16 - Rain by station for Nov 13-15

To do this, let's first make three separate dataframes, one with each of the dates between Nov. 13-15.

In [None]:
filtered = raw[["Station ID", "Climate ID", "Station Name",
                "Longitude (x)", "Latitude (y)", "Date/Time", "Month", "Day", "Total Precip (mm)"]]
filtered = filtered[filtered["Month"] == 11]

nov13 = filtered[filtered["Day"] == 13].rename(
    columns={"Total Precip (mm)": "Nov 13 Rainfall"})
nov14 = filtered[filtered["Day"] == 14].rename(
    columns={"Total Precip (mm)": "Nov 14 Rainfall"})
nov15 = filtered[filtered["Day"] == 15].rename(
    columns={"Total Precip (mm)": "Nov 15 Rainfall"})


Then, let's attach the Nov. 14 data to the Nov. 13 stuff.

In [None]:
step1 = pd.merge(left=nov13, right=nov14[[
                 "Station ID", "Nov 14 Rainfall"]], left_on="Station ID", right_on="Station ID")


And do the same for the Nov. 15 stuff so it's all together in one dataframe.

In [None]:
step2 = (pd
         .merge(left=step1, right=nov15[["Station ID", "Nov 15 Rainfall"]], left_on="Station ID", right_on="Station ID")
         .dropna(subset=["Nov 13 Rainfall", "Nov 14 Rainfall", "Nov 15 Rainfall"], how='all')
         .sort_values('Nov 14 Rainfall', ascending=False)
         )


## 2021/11/16 - North Vancouver time series of Nov. 14 and 15.

This block takes a few minutes to run, so only run it if you need to. It grabs records going back to 1980 for one station in North Vancouver.

In [None]:
lis = []

for year in range(1980, 2022):
    url = 'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=833&Year=' + \
        str(year) + '&Month=11&Day=14&timeframe=2'
    df = pd.read_csv(url)
    lis.append(df)

northvan = pd.concat(lis, axis=0, ignore_index=True)


In [None]:
nov14 = northvan[northvan["Date/Time"].str.contains("11-14")]
nov15 = northvan[northvan["Date/Time"].str.contains("11-15")]
nov = pd.concat([nov14, nov15])
nov_sum = (nov[['Station Name', 'Year', 'Total Rain (mm)']]
           .groupby(['Station Name', 'Year'])
           .sum()
           .reset_index()
           )

## 2021/11/24 - Average November rainfall for North Vancouver

We can take the previous collection of records for the North Vancouver weather station and run a different analysis on it here.

In [None]:
northvan_months = (northvan[["Year", "Month", "Total Rain (mm)"]]
                   .groupby(["Year", "Month"])
                   .sum()
                   .reset_index()
                   )
northvan_novembers = northvan_months[northvan_months["Month"] == 11]


## 2021/11/24 - Temperatures in cities across Canada

This takes ~3 minutes to run, so you may not want to run it every time.

In [None]:
cities = [
    "48549",  # Toronto
    "51442",  # Vancouver
    "51097",  # Winnipeg
    "50149",  # Edmonton
    "53938",  # Halifax
    "51157",  # Montreal
]

lis = []

for city in cities:
    for year in range(1980, 2022):
        url = 'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=' + \
            city + '&Year=' + str(year) + '&timeframe=2'
        df = pd.read_csv(url)
        df["Station ID"] = city
        lis.append(df)

all_cities = pd.concat(lis, axis=0, ignore_index=True)


## 2021/11/25 - Nova Scotia Nov. 23/24 data

In [None]:
ns_stations = stations[stations["Province"] == 'NOVA SCOTIA']
station_id_list = (ns_stations["Station ID"]
                   .dropna()
                   .astype(int)
                   .to_list()
                   )

li = []

for station_id in station_id_list:
    for year in range(2021, 2022):
        url = 'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=' + \
            str(station_id) + '&Year=' + str(year) + '&timeframe=2'
        df = pd.read_csv(url)
        df.insert(0, "Station ID", station_id)
        li.append(df)

ns_stations_raw = pd.concat(li, axis=0, ignore_index=True)

nov23 = ns_stations_raw[ns_stations_raw["Date/Time"].isin(["2021-11-23", "2021-11-24"])].groupby(
    ["Station Name", "Latitude (y)", "Longitude (x)"]).sum().sort_values("Total Precip (mm)", ascending=False).reset_index()
nov23 = nov23[["Station ID",
               "Longitude (x)", "Latitude (y)", "Station Name", "Total Precip (mm)"]]
nov23 = nov23[nov23["Total Precip (mm)"] != 0.0]

## 2021/11/25 - Newfoundland Nov. 23/24 data




In [None]:
nl_stations = stations[stations["Province"] == 'NEWFOUNDLAND']
nl_station_id_list = nl_stations["Station ID"].dropna().astype(int).to_list()


In [None]:
li = []

for station_id in nl_station_id_list:
    for year in range(2021, 2022):
        url = 'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=' + \
            str(station_id) + '&Year=' + str(year) + '&timeframe=2'
        df = pd.read_csv(url)
        df.insert(0, "Station ID", station_id)
        li.append(df)

nl_stations_raw = pd.concat(li, axis=0, ignore_index=True)
nl_stations_raw.to_csv(
    '/content/drive/MyDrive/Data/raw/weather/nl_stations_raw.csv')


In [None]:
nl_nov23 = nl_stations_raw[nl_stations_raw["Date/Time"].isin(["2021-11-23", "2021-11-24"])].groupby(
    ["Station Name", "Latitude (y)", "Longitude (x)"]).sum().sort_values("Total Precip (mm)", ascending=False).reset_index()
nl_nov23 = nl_nov23[[
    "Station ID", "Longitude (x)", "Latitude (y)", "Station Name", "Total Precip (mm)"]]
nl_nov23 = nl_nov23[nl_nov23["Total Precip (mm)"] != 0.0]

nl_nov23.to_csv('/content/drive/MyDrive/Data/exports/nl_stations_nov23.csv')
display(nl_nov23)


Unnamed: 0,Station ID,Longitude (x),Latitude (y),Station Name,Total Precip (mm)
0,122666,-59.15,47.57,PORT AUX BASQUES,165.1
1,13172,-57.33,48.17,BURNT POND,54.0
2,215072,-57.04,53.68,CARTWRIGHT A,45.8
3,110156,-60.56,53.41,HAPPY VALLEY GOOSE BAY,41.7
4,13554,-60.42,53.32,GOOSE A,41.6
5,207834,-55.85,52.3,MARY'S HARBOUR A,39.8
6,18068,-57.1,51.02,FEROLLE POINT (AUT),22.5
7,95214,-58.57,48.56,STEPHENVILLE RCS,22.4
8,96696,-56.07,51.39,ST. ANTHONY A,21.5
9,105518,-58.55,48.54,STEPHENVILLE A,20.2


## 2021/11/28 - All 2021 data

In [None]:
all_stations_2021 = pd.read_csv(
    '/content/drive/MyDrive/Data/raw/weather/all_stations_raw.csv')

recent = (
    all_stations_2021[all_stations_2021["Date/Time"] >= "2021-11-27"]
    .dropna(subset=["Total Precip (mm)"])
    .sort_values("Total Precip (mm)", ascending=False)
)

recent = recent[recent["Total Precip (mm)"] != 0.0]
recent = recent[["Station Name", "Climate ID",
                 "Date/Time", "Total Precip (mm)"]]

display(recent)
recent.head(20).to_csv(
    "/content/drive/MyDrive/Data/exports/weather/weather-nov28.csv")


  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,Station Name,Climate ID,Date/Time,Total Precip (mm)
4345,PORT RENFREW,1016335,2021-11-27,193.0
17850,CAPE BEALE LIGHT,1031316,2021-11-27,128.6
19310,NITINAT RIVER HATCHERY,1035612,2021-11-27,121.6
64571,HOPE AIRPORT,1113543,2021-11-28,119.0
20040,PACHENA POINT,1035940,2021-11-27,117.6
...,...,...,...,...
267876,HIGH LEVEL,3073148,2021-11-28,0.1
351460,THUNDER BAY CS,6048268,2021-11-27,0.1
479211,LA TUQUE,707DBD4,2021-11-28,0.1
205096,EDMONTON STONY PLAIN CS,301A001,2021-11-28,0.1


In [None]:
"all_stations_nov23 = all_stations_2021[["Station Name", "Station ID", 'Date/Time', "Total Precip(mm)"]].sort_values("Total Precip(mm)", ascending = False).dropna()
all_stations_nov23=all_stations_nov23[all_stations_nov23["Total Precip (mm)"] != 0.0].head(
    20)

all_stations_nov23.to_csv(
    '/content/drive/MyDrive/Data/exports/record_rainfalls.csv')


## 2021/11/28 - All 2020 data

In [None]:
li = []

for station_id in all_stations_list:
    url = 'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=' + \
        str(station_id) + '&Year=2020&timeframe=2'
    df = pd.read_csv(url)
    df.insert(0, "Station ID", station_id)
    li.append(df)

all_stations_raw_2020 = pd.concat(li, axis=0, ignore_index=True)
all_stations_raw_2020.to_csv(
    '/content/drive/MyDrive/Data/raw/weather/all_stations_raw_2020.csv')


In [None]:
all_stations_2020 = all_stations_raw_2020[["Station Name", "Station ID", 'Date/Time',
                                           "Total Precip (mm)"]].sort_values("Total Precip (mm)", ascending=False).dropna()
all_stations_2020 = all_stations_2020[all_stations_2020["Total Precip (mm)"] != 0.0].head(
    20)

all_stations_2020.to_csv(
    '/content/drive/MyDrive/Data/exports/record_rainfalls_2020.csv')
display(all_stations_2020)


Unnamed: 0,Station Name,Station ID,Date/Time,Total Precip (mm)
19062,NITINAT RIVER HATCHERY,260,2020-01-31,299.4
98057,PLEASANT CAMP,1504,2020-12-01,280.0
313109,RIVERS PETTAPIECE,3457,2020-06-28,238.9
59294,POINT ATKINSON,844,2020-01-03,221.4
507245,MECHANIC SETTLEMENT,45090,2020-12-01,171.1
315635,MARQUETTE,3619,2020-05-23,167.8
58224,N VANC GROUSE MTN RESORT,823,2020-01-31,163.5
307619,BRANDON A,50821,2020-06-28,155.5
58200,N VANC GROUSE MTN RESORT,823,2020-01-07,154.0
307985,BRANDON RCS,49909,2020-06-28,152.3


## 2021/11/28 - Data since 2000

In [None]:
li = []

for year in range(2000, 2022):
    for station_id in all_stations_list:
        url = 'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=' + \
            str(station_id) + '&Year=' + str(year) + '&timeframe=2'
        df = pd.read_csv(url)
        df.insert(0, "Station ID", station_id)
        li.append(df)

all_stations_raw_2000 = pd.concat(li, axis=0, ignore_index=True)
all_stations_raw_2000.to_csv(
    '/content/drive/MyDrive/Data/raw/weather/all_stations_raw_2000-2021.csv')


KeyboardInterrupt: ignored

## 2022/01/06 - Abbotsford Rainfall in November

In [None]:
lis = []

for year in range(1980, 2022):
    url = 'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=50308&Year=' + \
        str(year) + '&timeframe=2'
    df = pd.read_csv(url)
    lis.append(df)

abby_raw = pd.concat(lis, axis=0, ignore_index=True)
display(abby_raw)


Unnamed: 0,Longitude (x),Latitude (y),Station Name,Climate ID,Date/Time,Year,Month,Day,Data Quality,Max Temp (°C),Max Temp Flag,Min Temp (°C),Min Temp Flag,Mean Temp (°C),Mean Temp Flag,Heat Deg Days (°C),Heat Deg Days Flag,Cool Deg Days (°C),Cool Deg Days Flag,Total Rain (mm),Total Rain Flag,Total Snow (cm),Total Snow Flag,Total Precip (mm),Total Precip Flag,Snow on Grnd (cm),Snow on Grnd Flag,Dir of Max Gust (10s deg),Dir of Max Gust Flag,Spd of Max Gust (km/h),Spd of Max Gust Flag
0,-122.36,49.03,ABBOTSFORD A,1100031,1980-01-01,1980,1,1,,,,,,,,,,,,,,,,,,,,,,,
1,-122.36,49.03,ABBOTSFORD A,1100031,1980-01-02,1980,1,2,,,,,,,,,,,,,,,,,,,,,,,
2,-122.36,49.03,ABBOTSFORD A,1100031,1980-01-03,1980,1,3,,,,,,,,,,,,,,,,,,,,,,,
3,-122.36,49.03,ABBOTSFORD A,1100031,1980-01-04,1980,1,4,,,,,,,,,,,,,,,,,,,,,,,
4,-122.36,49.03,ABBOTSFORD A,1100031,1980-01-05,1980,1,5,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15336,-122.36,49.03,ABBOTSFORD A,1100031,2021-12-27,2021,12,27,,,,,,,,,,,,,,,,,,,,,,,
15337,-122.36,49.03,ABBOTSFORD A,1100031,2021-12-28,2021,12,28,,,,,,,,,,,,,,,,,,,,,,,
15338,-122.36,49.03,ABBOTSFORD A,1100031,2021-12-29,2021,12,29,,,,,,,,,,,,,,,,,,,,,,,
15339,-122.36,49.03,ABBOTSFORD A,1100031,2021-12-30,2021,12,30,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
abbotsford = abby_raw[abby_raw["Station Name"].str.contains('ABBOTSFORD')]
abbotsford = abbotsford[["Station Name", "Date/Time",
                         "Year", "Month", "Day", "Total Precip (mm)"]].dropna()
pivot = pd.pivot_table(abbotsford, columns=[
                       "Month"], index="Year", values="Total Precip (mm)", aggfunc=sum)

display(pivot)
pivot.to_csv('/content/drive/MyDrive/Data/exports/weather/abbotsford_2012.csv')


Month,1,2,3,4,5,6,7,8,9,10,11,12
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2012,,,,,,58.6,52.7,3.8,6.6,261.8,160.2,161.2
2013,135.7,102.8,214.9,144.3,103.3,81.0,3.8,25.7,122.5,63.1,217.5,118.0
2014,137.9,100.3,283.1,121.6,123.2,42.2,35.1,7.6,97.7,230.8,232.5,214.2
2015,173.8,111.4,187.9,52.9,8.6,12.1,51.9,43.3,70.6,102.1,185.6,235.0
2016,166.7,194.6,157.2,71.7,58.0,46.2,30.9,14.5,69.5,271.3,324.9,144.4
2017,111.5,188.7,309.1,173.6,102.9,46.0,4.4,3.2,75.5,186.6,239.8,199.6
2018,253.9,136.7,122.8,143.5,20.4,40.3,20.0,8.0,153.1,120.6,256.2,225.9
2019,160.9,94.7,42.1,145.3,52.9,28.0,50.2,20.5,159.7,132.2,102.5,223.1
2020,359.7,150.7,123.3,49.6,111.1,75.9,50.1,43.7,51.7,124.9,256.4,296.7
2021,215.2,138.8,66.5,31.9,48.4,36.1,0.0,22.4,164.5,179.3,476.3,


## 2022/01/18 - Vancouver Harbour

In [3]:
lis = []

for year in range(1980, 2022):
    url = 'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=888&Year=' + \
        str(year) + '&Month=11&Day=14&timeframe=2'
    df = pd.read_csv(url)
    lis.append(df)

van = pd.concat(lis, axis=0, ignore_index=True)
van.to_csv('/content/drive/MyDrive/Data/raw/weather/van_historical.csv')


In [5]:
nov14 = van[van["Date/Time"].str.contains("11-14")]
nov15 = van[van["Date/Time"].str.contains("11-15")]
nov = pd.concat([nov14, nov15])
nov_sum = nov[['Station Name', 'Year', 'Total Precip (mm)']].groupby(
    ['Station Name', 'Year']).sum().reset_index()

display(nov_sum)


Unnamed: 0,Station Name,Year,Total Precip (mm)
0,VANCOUVER HARBOUR CS,1980,0.8
1,VANCOUVER HARBOUR CS,1981,20.4
2,VANCOUVER HARBOUR CS,1982,31.2
3,VANCOUVER HARBOUR CS,1983,101.2
4,VANCOUVER HARBOUR CS,1984,0.0
5,VANCOUVER HARBOUR CS,1985,21.6
6,VANCOUVER HARBOUR CS,1986,9.4
7,VANCOUVER HARBOUR CS,1987,21.9
8,VANCOUVER HARBOUR CS,1988,0.0
9,VANCOUVER HARBOUR CS,1989,0.8
