In [22]:
import requests
import pandas as pd
import numpy as np

In [23]:
def get_request(url, endpoint):
    response = requests.get(url+endpoint)
    if response.status_code == 200:
        return response.json()['features']
    else:
        return f"Error + {response.status_code}"

In [24]:
api_url = "https://api.weather.gc.ca/"
province_code = "ON"
years = range(2017, 2022)
all_data = []

In [25]:
for year in years:
    climate_hourly_endpoint = f"collections/climate-hourly/items?lang=en&limit=55000&offset=0&PROVINCE_CODE={province_code}&UTC_YEAR={year}"
    yearly_data = get_request(api_url, climate_hourly_endpoint)
    all_data.extend(yearly_data)

In [26]:
df_yearly = pd.DataFrame(yearly_data)
df_yearly.head()



Unnamed: 0,id,type,geometry,properties
0,6158875.2020.12.31.19,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2020-12-31 19:00:00', 'HUMIDEX..."
1,6158875.2020.12.31.20,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2020-12-31 20:00:00', 'HUMIDEX..."
2,6158875.2020.12.31.21,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2020-12-31 21:00:00', 'HUMIDEX..."
3,6158875.2021.1.1.6,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2021-01-01 06:00:00', 'HUMIDEX..."
4,6158875.2021.1.1.7,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2021-01-01 07:00:00', 'HUMIDEX..."


In [27]:
import json
def ensure_dict(obj):
    if isinstance(obj, str):
        try:
            return json.loads(obj.replace("'", "\""))
        except json.JSONDecodeError:
            return {}
    return obj

In [28]:
df_yearly['geometry'] = df_yearly['geometry'].apply(ensure_dict)
df_yearly['properties'] = df_yearly['properties'].apply(ensure_dict)
df_yearly.head()

Unnamed: 0,id,type,geometry,properties
0,6158875.2020.12.31.19,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2020-12-31 19:00:00', 'HUMIDEX..."
1,6158875.2020.12.31.20,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2020-12-31 20:00:00', 'HUMIDEX..."
2,6158875.2020.12.31.21,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2020-12-31 21:00:00', 'HUMIDEX..."
3,6158875.2021.1.1.6,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2021-01-01 06:00:00', 'HUMIDEX..."
4,6158875.2021.1.1.7,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'LOCAL_DATE': '2021-01-01 07:00:00', 'HUMIDEX..."


In [29]:
df_yearly[['longitude', 'latitude']] = df_yearly['geometry'].apply(
    lambda x: x.get('coordinates', [None, None])).apply(pd.Series)

properties_df = df_yearly['properties'].apply(pd.Series)

In [30]:
df_yearly = pd.concat([df_yearly.drop(['geometry', 'properties'], axis=1), properties_df], axis=1)

In [31]:
df_yearly.head()

Unnamed: 0,id,type,longitude,latitude,LOCAL_DATE,HUMIDEX_FLAG,PROVINCE_CODE,CLIMATE_IDENTIFIER,RELATIVE_HUMIDITY,ID,...,STATION_NAME,LOCAL_MONTH,VISIBILITY_FLAG,UTC_DATE,TEMP_FLAG,WIND_SPEED_FLAG,LOCAL_HOUR,DEW_POINT_TEMP_FLAG,UTC_YEAR,UTC_DAY
0,6158875.2020.12.31.19,Feature,-77.528056,44.118889,2020-12-31 19:00:00,,ON,6158875,72.0,6158875.2020.12.31.19,...,TRENTON A,12,,2021-01-01T00:00:00,,,19,,2021,1
1,6158875.2020.12.31.20,Feature,-77.528056,44.118889,2020-12-31 20:00:00,,ON,6158875,76.0,6158875.2020.12.31.20,...,TRENTON A,12,,2021-01-01T01:00:00,,,20,,2021,1
2,6158875.2020.12.31.21,Feature,-77.528056,44.118889,2020-12-31 21:00:00,,ON,6158875,77.0,6158875.2020.12.31.21,...,TRENTON A,12,,2021-01-01T02:00:00,,,21,,2021,1
3,6158875.2021.1.1.6,Feature,-77.528056,44.118889,2021-01-01 06:00:00,,ON,6158875,91.0,6158875.2021.1.1.6,...,TRENTON A,1,,2021-01-01T11:00:00,,,6,,2021,1
4,6158875.2021.1.1.7,Feature,-77.528056,44.118889,2021-01-01 07:00:00,,ON,6158875,91.0,6158875.2021.1.1.7,...,TRENTON A,1,,2021-01-01T12:00:00,,,7,,2021,1


In [32]:
df_yearly.to_csv(f'./data/weather_data{year}.csv', index=False)