In [1]:
import pandas as pd
import datetime as dt

# Run this section if you need to recover corrupted data

In [2]:
raw = pd.read_csv("./data/data-historical-both.csv", index_col="Climate ID", low_memory=False)

In [4]:
raw = raw.drop(columns=["Unnamed: 0"])

raw.tail(3)

KeyError: "['Unnamed: 0'] not found in axis"

In [5]:
stations = pd.read_csv("./data/stations_cma.csv", index_col="Climate ID")

stations.head(3)

Unnamed: 0_level_0,Station ID,Name,Latitude,Longitude,First Year,Last Year,DLY First Year,DLY Last Year,CMAUID,CMANAME,PRUID
Climate ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1010774,18,BEAVER LAKE,483000000.0,-1232100000.0,1894,1952,1894.0,1952.0,935,Victoria,BC
1010960,20,BRENTWOOD BAY 2,483600000.0,-1232800000.0,1987,1997,1987.0,1997.0,935,Victoria,BC
1010961,21,BRENTWOOD CLARKE ROAD,483400000.0,-1232700000.0,1972,1980,1972.0,1980.0,935,Victoria,BC


In [6]:
stations[stations["DLY Last Year"].isna()]

Unnamed: 0_level_0,Station ID,Name,Latitude,Longitude,First Year,Last Year,DLY First Year,DLY Last Year,CMAUID,CMANAME,PRUID
Climate ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1021267,51317,CAMPBELL RIVER A,495707000.0,-1.251623e+09,2013,2022,,,944,Campbell River,BC
1046395,51438,POWELL RIVER A,495003000.0,-1.243001e+09,2013,2022,,,945,Powell River,BC
1068133,10750,TERRACE SKEENA BRIDGE CS,543100000.0,-1.283400e+09,1999,2002,,,965,Terrace,BC
1093598,594,HORSEFLY,522000000.0,-1.212500e+09,1961,1962,,,950,Williams Lake,BC
3013958,50311,LLOYDMINSTER A,531838000.0,-1.100427e+09,2012,2022,,,840,Lloydminster (Alberta part / partie de l'Alberta),AB
...,...,...,...,...,...,...,...,...,...,...,...
8200576,26824,BEDFORD MWO,444400000.0,-6.340000e+08,1995,1998,,,205,Halifax,NS
8202221,49128,HALIFAX COMMONS,443800000.0,-6.335000e+08,2010,2011,,,205,Halifax,NS
8205697,10970,SYDNEY CCR,461003000.0,-6.009530e+08,1994,1995,,,225,Cape Breton,NS
8401710,10968,GANDER CCR,485642000.0,-5.435020e+08,1994,1995,,,11,Gander,NL


In [8]:
joined = raw.join(stations).dropna()

joined["Day"] = pd.to_datetime(joined["Date/Time"]).dt.day
joined["Month"] = pd.to_datetime(joined["Date/Time"]).dt.month
joined["Year"] = pd.to_datetime(joined["Date/Time"]).dt.year

In [9]:
minimums = (joined
            .pivot_table(columns="Year", values="Min Temp (°C)", index=["CMANAME", "Month", "Day"], aggfunc='min')
            .reset_index()
            .melt(id_vars=["CMANAME", "Month", "Day"])
            .dropna()
            )

minimums.tail()

Unnamed: 0,CMANAME,Month,Day,Year,value
9772704,Yorkton,11,17,2022,-20.4
9772705,Yorkton,11,18,2022,-16.4
9772706,Yorkton,11,19,2022,-17.9
9772707,Yorkton,11,20,2022,-16.0
9772708,Yorkton,11,21,2022,-12.4


In [10]:
minimums[minimums["CMANAME"] == "Weyburn"]

Unnamed: 0,CMANAME,Month,Day,Year,value
4108615,Weyburn,9,1,1916,12.8
4108616,Weyburn,9,2,1916,10.0
4108617,Weyburn,9,3,1916,15.6
4108618,Weyburn,9,4,1916,12.2
4108619,Weyburn,9,5,1916,10.0
...,...,...,...,...,...
9769410,Weyburn,11,17,2022,-19.1
9769411,Weyburn,11,18,2022,-18.7
9769412,Weyburn,11,19,2022,-14.6
9769413,Weyburn,11,20,2022,-12.3


In [13]:
minimums.to_csv("./data/raw_data-min.csv")

In [11]:
maximums = (joined
            .pivot_table(columns="Year", values="Max Temp (°C)", index=["CMANAME", "Month", "Day"], aggfunc='max')
            .reset_index()
            .melt(id_vars=["CMANAME", "Month", "Day"])
            .dropna()
            )

maximums

Unnamed: 0,CMANAME,Month,Day,Year,value
46143,Toronto,3,1,1840,8.3
46144,Toronto,3,2,1840,7.8
46145,Toronto,3,3,1840,11.1
46146,Toronto,3,4,1840,15.0
46147,Toronto,3,5,1840,6.7
...,...,...,...,...,...
9772704,Yorkton,11,17,2022,-9.7
9772705,Yorkton,11,18,2022,-6.0
9772706,Yorkton,11,19,2022,-6.4
9772707,Yorkton,11,20,2022,-2.9


In [12]:
maximums[maximums["CMANAME"] == "Weyburn"]

Unnamed: 0,CMANAME,Month,Day,Year,value
4108615,Weyburn,9,1,1916,20.0
4108616,Weyburn,9,2,1916,25.0
4108617,Weyburn,9,3,1916,23.9
4108618,Weyburn,9,4,1916,25.0
4108619,Weyburn,9,5,1916,20.0
...,...,...,...,...,...
9769410,Weyburn,11,17,2022,-9.8
9769411,Weyburn,11,18,2022,-7.3
9769412,Weyburn,11,19,2022,-4.9
9769413,Weyburn,11,20,2022,-0.2


In [14]:
maximums.to_csv("./data/raw_data-max.csv")

# Scrap

In [38]:
year_counts = []

for city in maximums.reset_index()["CMANAME"].unique():
    saint_john = maximums.reset_index()
    saint_john = saint_john[saint_john["CMANAME"] == city].dropna(axis=1, how='all').set_index(["CMANAME", "Month", "Day"]).columns
    saint_john = pd.DataFrame({"city": [city], "years": [len(saint_john)]})
    
    year_counts.append(saint_john)
    
df = pd.concat(year_counts)

df.sort_values("years")

Unnamed: 0,city,years
0,High River,3
0,Ingersoll,10
0,Camrose,14
0,Wasaga Beach,16
0,Campbellton (partie du Québec / Quebec part),23
...,...,...
0,Woodstock,152
0,London,152
0,Peterborough,155
0,Hamilton,156


In [39]:
cmasWithEnoughData = df.loc[df["years"] >= 5, "city"].to_list()
cmasWithEnoughData

['Abbotsford - Mission',
 'Alma',
 'Amos',
 'Baie-Comeau',
 'Barrie',
 'Bathurst',
 'Belleville - Quinte West',
 'Brandon',
 'Brantford',
 'Brockville',
 'Brooks',
 'Calgary',
 'Campbell River',
 'Campbellton (New Brunswick part / partie du Nouveau-Brunswick)',
 'Campbellton (partie du Québec / Quebec part)',
 'Camrose',
 'Cape Breton',
 'Centre Wellington',
 'Charlottetown',
 'Chatham-Kent',
 'Chilliwack',
 'Cobourg',
 'Collingwood',
 'Corner Brook',
 'Cornwall',
 'Courtenay',
 'Cranbrook',
 'Dawson Creek',
 'Dolbeau-Mistassini',
 'Drummondville',
 'Duncan',
 'Edmonton',
 'Edmundston',
 'Elliot Lake',
 'Essa',
 'Estevan',
 'Fort St. John',
 'Fredericton',
 'Gander',
 'Granby',
 'Grande Prairie',
 'Greater Sudbury / Grand Sudbury',
 'Guelph',
 'Halifax',
 'Hamilton',
 'Ingersoll',
 'Joliette',
 'Kamloops',
 'Kawartha Lakes',
 'Kelowna',
 'Kenora',
 'Kentville',
 'Kingston',
 'Kitchener - Cambridge - Waterloo',
 'Lachute',
 'Lacombe',
 'Ladysmith',
 'Lethbridge',
 "Lloydminster (Alberta

In [380]:
maximumsWithEnoughData = maximums.reset_index()[maximums.reset_index()["CMANAME"].isin(cmasWithEnoughData)].set_index(["CMANAME", "Month", "Day"])

more_testing = pd.DataFrame({"Year": maximumsWithEnoughData.idxmax(axis=1), "Temp": maximumsWithEnoughData.max(axis=1)}).reset_index()
more_testing["Date"] = pd.to_datetime(more_testing["Year"].astype(str) + "-" + more_testing["Month"].astype(str) + "-" + more_testing["Day"].astype(str))
more_testing = more_testing.drop(columns=["Month", "Day", "Year"])
more_testing["days_since_record"] = (pd.datetime.today() - more_testing["Date"]).dt.days - 1

final_2 = []

for city in more_testing["CMANAME"].unique():
    
    d = more_testing[more_testing["CMANAME"] == city]

    d = d[d["days_since_record"] == d["days_since_record"].min()]
    final_2.append(d)
    
pd.concat(final_2).set_index("CMANAME").sort_values("days_since_record")

  more_testing["days_since_record"] = (pd.datetime.today() - more_testing["Date"]).dt.days - 1


Unnamed: 0_level_0,Temp,Date,days_since_record
CMANAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Halifax,15.1,2022-11-16,5
Whitehorse,5.7,2022-11-14,7
Ottawa - Gatineau (Ontario part / partie de l'Ontario),14.4,2022-11-12,9
Fredericton,19.3,2022-11-12,9
Sherbrooke,20.3,2022-11-12,9
...,...,...,...
Matane,27.0,1993-08-02,10703
Rimouski,22.0,1982-05-25,14790
Wasaga Beach,15.0,1981-04-27,15183
New Glasgow,10.0,1978-11-23,16069


In [378]:
halifax = maximums.reset_index()[maximums.reset_index()["CMANAME"] == "Saint John"].dropna(how='all', axis=1)

halifax.to_csv("data/halifax.csv")

In [105]:
minimums = joined.pivot_table(columns="CMANAME", values="Min Temp (°C)", index="Date/Time", aggfunc='min')

minimums.tail()

CMANAME,Abbotsford - Mission,Alma,Amos,Baie-Comeau,Barrie,Bathurst,Belleville - Quinte West,Brandon,Brantford,Brockville,...,Weyburn,Whitehorse,Williams Lake,Windsor,Winkler,Winnipeg,Wood Buffalo,Woodstock,Yellowknife,Yorkton
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-11-17,4.6,-4.8,,-6.6,,-4.1,-6.8,-17.9,-3.2,-7.5,...,-19.1,-11.7,-9.3,-2.8,-13.1,-15.1,-18.3,,-13.7,-20.4
2022-11-18,-2.7,-7.4,,-9.7,,-10.8,-10.2,-18.2,-3.7,-7.5,...,-18.7,-14.6,-11.4,-4.9,-15.4,-16.6,-11.1,,-8.4,-16.4
2022-11-19,-4.2,-10.7,,-13.4,,-12.7,-4.2,-16.6,-6.1,-4.6,...,-14.6,-13.4,-18.2,-6.6,-14.2,-17.7,-15.1,,-10.5,-17.9
2022-11-20,-3.6,-10.1,,-9.6,,-9.1,-13.2,-15.0,-9.7,-16.8,...,-12.3,-5.9,-12.6,-9.2,-13.2,-11.0,-10.3,,-11.3,-16.0
2022-11-21,0.4,-13.4,,-12.0,,-8.8,-13.5,-11.4,-9.9,-11.7,...,-14.1,-1.3,-4.9,-4.9,-5.5,-10.8,-8.7,,-5.3,-12.4


In [106]:
minimums = minimums.drop(["2022-11-21"])

minimums.tail()

CMANAME,Abbotsford - Mission,Alma,Amos,Baie-Comeau,Barrie,Bathurst,Belleville - Quinte West,Brandon,Brantford,Brockville,...,Weyburn,Whitehorse,Williams Lake,Windsor,Winkler,Winnipeg,Wood Buffalo,Woodstock,Yellowknife,Yorkton
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-11-16,-3.8,-4.2,,-7.4,,-3.6,-2.5,-16.8,-2.3,-3.0,...,,-10.0,-7.2,0.7,-13.4,-15.7,-16.7,,-19.4,-10.0
2022-11-17,4.6,-4.8,,-6.6,,-4.1,-6.8,-17.9,-3.2,-7.5,...,-19.1,-11.7,-9.3,-2.8,-13.1,-15.1,-18.3,,-13.7,-20.4
2022-11-18,-2.7,-7.4,,-9.7,,-10.8,-10.2,-18.2,-3.7,-7.5,...,-18.7,-14.6,-11.4,-4.9,-15.4,-16.6,-11.1,,-8.4,-16.4
2022-11-19,-4.2,-10.7,,-13.4,,-12.7,-4.2,-16.6,-6.1,-4.6,...,-14.6,-13.4,-18.2,-6.6,-14.2,-17.7,-15.1,,-10.5,-17.9
2022-11-20,-3.6,-10.1,,-9.6,,-9.1,-13.2,-15.0,-9.7,-16.8,...,-12.3,-5.9,-12.6,-9.2,-13.2,-11.0,-10.3,,-11.3,-16.0


In [400]:
maximums.reset_index().to_csv("data/test_data.csv")

Now let's make a call to ECC's api to test adding new data.

In [108]:
# Get a list of unique station IDs to iterate through.
listOfStationIds = (stations
                    .loc[stations["DLY Last Year"] == 2022, "Station ID"]
                    .unique()
                    .astype(int)
                    )

listOfStationIds

array([   26,    68,    76, 46728,    87, 54641,    78,  6811,  6812,
         114, 51337,   145, 52979,   155,   181, 52941, 51318,   189,
       45627,   209,   225,  8045,  6816,   327, 45807, 52018,   336,
       48693,   424,   442, 51037,   568,   617, 48370, 48248, 50169,
         636, 48688, 48108,   650, 50820, 50308, 54238,   706,   707,
         731,   747, 50368, 49088,   766, 43723,   776,   810,   823,
        6830,   853,   834,   837,   870, 51357, 51442,  6833,   925,
       48369, 51117,  1032,  1046, 50269, 46987,  1070,   979,  1137,
        6839, 31067,  1095, 50818,  1186, 51423, 42203,  1338,  1216,
        6843, 11006, 11005, 48208, 50837, 55198, 50842, 48168, 51058,
       55358, 27793, 27214, 30907, 50149, 53718, 51758,  1870,  1886,
       53998, 47088, 48550,  1920, 46911, 41783, 46850, 43581, 27018,
       27492, 52458, 49050, 32455, 46447, 50921,  2180, 50430, 27211,
       52200,  9010, 42728, 46807, 50940, 49268, 51377,  2265, 46867,
       42726, 48949,

In [404]:
li = []

# Get a list of unique station IDs to iterate through.
listOfStationIds = (stations
                    .loc[stations["Last Year"] == 2022, "Station ID"]
                    .unique()
                    .astype(int)
                    )

for stationId in listOfStationIds[0:10]:

    df = pd.read_csv(f'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID={str(stationId)}&Year=2022&Month=11&Day=21&timeframe=2')
    df = df.loc[df["Date/Time"] == "2022-11-21",:]
    df = df[[ "Climate ID", "Year", "Month", "Day", "Date/Time", "Max Temp (°C)", "Min Temp (°C)"]]
    li.append(df)
    
new_data = pd.concat(li).set_index("Climate ID")

In [405]:
new_data.index = new_data.index.astype(str)

In [406]:
new_data

Unnamed: 0_level_0,Year,Month,Day,Date/Time,Max Temp (°C),Min Temp (°C)
Climate ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1011500,2022,11,21,2022-11-21,,
1015105,2022,11,21,2022-11-21,,
1015628,2022,11,21,2022-11-21,,
1015630,2022,11,21,2022-11-21,9.0,3.1
1016940,2022,11,21,2022-11-21,,4.5
1016943,2022,11,21,2022-11-21,8.9,4.5
1016RM0,2022,11,21,2022-11-21,,
1017254,2022,11,21,2022-11-21,8.5,5.6
1018598,2022,11,21,2022-11-21,8.7,4.6
1018611,2022,11,21,2022-11-21,9.5,4.4


In [407]:
new_joined = new_data.join(stations)

new_joined.tail()

Unnamed: 0_level_0,Year,Month,Day,Date/Time,Max Temp (°C),Min Temp (°C),Station ID,Name,Latitude,Longitude,First Year,Last Year,DLY First Year,DLY Last Year,CMAUID,CMANAME,PRUID
Climate ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1016943,2022,11,21,2022-11-21,8.9,4.5,54641,SAANICHTON CFIA,483718000.0,-1232508000.0,2020,2022,2020.0,2022.0,935,Victoria,BC
1016RM0,2022,11,21,2022-11-21,,,78,SAANICHTON MT NEWTON,483551000.0,-1232538000.0,1980,2022,1980.0,2022.0,935,Victoria,BC
1017254,2022,11,21,2022-11-21,8.5,5.6,6811,SHERINGHAM POINT,482236100.0,-1235516000.0,1992,2022,1992.0,2022.0,935,Victoria,BC
1018598,2022,11,21,2022-11-21,8.7,4.6,6812,VICTORIA UNIVERSITY CS,482725200.0,-1231817000.0,1992,2022,1992.0,2022.0,935,Victoria,BC
1018611,2022,11,21,2022-11-21,9.5,4.4,114,VICTORIA GONZALES CS,482447896.0,-1231929000.0,1972,2022,1973.0,2022.0,935,Victoria,BC


In [1]:
# new_joined["Day"] = new_joined["Date/Time"].day
# new_joined["Month"] = new_joined["Date/Time"].month
# new_joined["Year"] = new_joined["Date/Time"].year

maximums_new = new_joined.pivot_table(columns="Year", values="Max Temp (°C)", index=["CMANAME", "Month", "Day"], aggfunc='max')

maximums_new.head()

NameError: name 'new_joined' is not defined

In [409]:
final = pd.concat([maximums, maximums_new]).drop_duplicates()

final

Unnamed: 0_level_0,Unnamed: 1_level_0,Year,1889,1890,1891,1892,1893,1894,1895,1896,1897,1898,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
CMANAME,Month,Day,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Abbotsford - Mission,1,1,,,,,,,,,,,...,2.0,6.0,2.0,1.0,1.0,-1.0,5.0,11.5,9.0,0.0
Abbotsford - Mission,1,2,,,,,,,,,,,...,4.5,9.0,2.0,1.0,-1.0,2.5,6.0,7.5,9.0,4.0
Abbotsford - Mission,1,3,,,,,,,,,,,...,4.5,4.5,2.5,1.5,-1.5,4.0,9.0,14.0,7.5,4.0
Abbotsford - Mission,1,4,,,,,,,,,,,...,3.0,4.0,0.5,2.0,2.0,5.0,10.5,6.5,9.0,3.0
Abbotsford - Mission,1,5,,,,,,,,,,,...,4.5,5.0,6.0,2.0,-1.0,8.0,7.5,6.0,9.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Yorkton,12,27,,,,,,,,,,,...,1.0,-13.0,-16.0,,,,,,,
Yorkton,12,28,,,,,,,,,,,...,-22.0,-22.0,-11.0,,,,,,,
Yorkton,12,29,,,,,,,,,,,...,-26.0,-23.0,-14.0,,,,,,,
Yorkton,12,30,,,,,,,,,,,...,-26.0,-12.0,-7.0,,,,,,,


In [166]:
def getMaxForDate(date)
    maxData = []

    for col_name, data in final.iloc[:, 0:1].iteritems():
        data = pd.DataFrame({"temp": data})
        data["Day"] = pd.to_datetime(data.index).day.astype(int)
        data["Month"] = pd.to_datetime(data.index).month.astype(int)
        data["Year"] = pd.to_datetime(data.index).year.astype(int)
        
        pivot = data.pivot(index=["Month", "Day"], columns="Year").dropna(axis=1, how='all')
        
        display(pivot)
        

Unnamed: 0_level_0,Unnamed: 1_level_0,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp
Unnamed: 0_level_1,Year,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
Month,Day,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1,1,,1.1,,4.4,6.1,6.1,7.2,-0.6,1.7,10.6,...,2.0,6.0,2.0,1.0,1.0,-1.0,5.0,11.5,9.0,0.0
1,2,,0.0,2.2,-6.7,3.9,8.9,7.2,-1.1,1.7,11.1,...,4.5,9.0,2.0,1.0,-1.0,2.5,6.0,7.5,9.0,4.0
1,3,,0.6,0.0,-9.4,1.7,9.4,5.6,0.0,1.7,10.0,...,4.5,4.5,2.5,1.5,-1.5,4.0,9.0,14.0,7.5,4.0
1,4,,2.8,4.4,-3.3,-5.6,7.2,3.9,-1.1,1.7,7.8,...,3.0,4.0,0.5,2.0,2.0,5.0,10.5,6.5,9.0,3.0
1,5,,-0.6,4.4,-3.9,-3.3,11.1,3.9,-0.6,6.1,8.3,...,4.5,5.0,6.0,2.0,-1.0,8.0,7.5,6.0,9.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,27,3.3,7.8,9.4,2.8,7.2,5.6,3.3,-2.8,3.3,6.1,...,6.5,4.5,0.5,5.0,-1.0,4.5,3.5,4.5,-12.5,
12,28,3.9,6.1,8.3,7.2,8.9,5.0,2.8,-2.2,5.0,7.2,...,7.0,5.0,2.5,4.0,-0.5,9.5,4.5,5.5,-8.5,
12,29,1.1,5.6,7.2,4.4,7.2,3.3,3.9,1.7,8.3,6.7,...,5.0,2.5,4.5,4.0,1.0,10.0,7.5,5.0,-1.0,
12,30,3.3,5.0,-1.1,5.6,7.2,5.6,2.2,-1.7,12.8,5.0,...,6.5,0.0,2.0,4.0,0.0,7.0,7.0,5.0,-2.0,


In [124]:
pd.DataFrame({"Date of record": final.idxmax()})

Unnamed: 0_level_0,Test
CMANAME,Unnamed: 1_level_1
Abbotsford - Mission,2021-06-28
Alma,1975-08-01
Amos,1921-07-03
Baie-Comeau,2020-06-18
Barrie,1973-08-09
...,...
Winnipeg,1983-09-02
Wood Buffalo,2021-06-30
Woodstock,1989-07-11
Yellowknife,2018-08-11


In [401]:
max = pd.read_csv("data/raw_data.csv")

max

Unnamed: 0,CMANAME,Month,Day,1889,1890,1891,1892,1893,1894,1895,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Abbotsford - Mission,1,1,,,,,,,,...,2.0,6.0,2.0,1.0,1.0,-1.0,5.0,11.5,9.0,0.0
1,Abbotsford - Mission,1,2,,,,,,,,...,4.5,9.0,2.0,1.0,-1.0,2.5,6.0,7.5,9.0,4.0
2,Abbotsford - Mission,1,3,,,,,,,,...,4.5,4.5,2.5,1.5,-1.5,4.0,9.0,14.0,7.5,4.0
3,Abbotsford - Mission,1,4,,,,,,,,...,3.0,4.0,0.5,2.0,2.0,5.0,10.5,6.5,9.0,3.0
4,Abbotsford - Mission,1,5,,,,,,,,...,4.5,5.0,6.0,2.0,-1.0,8.0,7.5,6.0,9.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42583,Yorkton,12,27,,,,,,,,...,1.0,-13.0,-16.0,,,,,,,
42584,Yorkton,12,28,,,,,,,,...,-22.0,-22.0,-11.0,,,,,,,
42585,Yorkton,12,29,,,,,,,,...,-26.0,-23.0,-14.0,,,,,,,
42586,Yorkton,12,30,,,,,,,,...,-26.0,-12.0,-7.0,,,,,,,


In [30]:
raw = pd.read_csv("./data/raw_data2.csv")

raw["Date"] = pd.to_datetime(raw["Year"].astype(str) + "-" + raw["Month"].astype(str) + "-" + raw["Day"].astype(str))

Unnamed: 0.1,Unnamed: 0,index,CMANAME,Month,Day,Year,value,Date
0,0,0,Toronto,3,1,1840,8.3,1840-03-01
1,1,1,Toronto,3,2,1840,7.8,1840-03-02
2,2,2,Toronto,3,3,1840,11.1,1840-03-03
3,3,3,Toronto,3,4,1840,15.0,1840-03-04
4,4,4,Toronto,3,5,1840,6.7,1840-03-05
...,...,...,...,...,...,...,...,...
5372250,5372250,5372250,Yorkton,11,17,2022,-9.7,2022-11-17
5372251,5372251,5372251,Yorkton,11,18,2022,-6.0,2022-11-18
5372252,5372252,5372252,Yorkton,11,19,2022,-6.4,2022-11-19
5372253,5372253,5372253,Yorkton,11,20,2022,-2.9,2022-11-20


In [32]:
max_date = raw["Date"].max().date()

max_date

datetime.date(2022, 11, 21)