In [33]:
import requests
import pandas as pd
import numpy as np

In [34]:
def get_request(url, endpoint):
    response = requests.get(url+endpoint)
    if response.status_code == 200:
        return response.json()['features']
    else:
        return f"Error + {response.status_code}"

In [203]:
api_url = "https://api.weather.gc.ca/collections/climate-hourly/items"
province_code = "ON"
year = 2016
limit = 300000  # Max number of records the API will return per request
offset = 0  # Start at the beginning of the dataset
all_data = []

In [204]:
while True:
    params = {
        "lang": "en",
        "limit": limit,
        "offset": offset,
        "PROVINCE_CODE": province_code,
        "UTC_YEAR": year
    }
    
    # Make the API request
    response = requests.get(api_url, params=params)
    
    if response.status_code == 200:
        batch_data = response.json()  # Assumes the API returns JSON formatted data
        number_of_records = len(batch_data)
        
        # Add new data to all_data list (you could also be storing batches to a file or database)
        all_data.extend(batch_data)

        # If the number of records is less than the limit we requested,
        # we've received the last page of data
        if number_of_records < limit:
            break

        # Otherwise, increment the offset to get the next batch of data
        offset += number_of_records

    else:
        print(f"Failed to retrieve data: {response.status_code}")
        # Handle the error appropriately - retry logic, logging, etc.
        break

In [205]:
df_yearly = pd.DataFrame(response.json()['features'])
df_yearly.head()



Unnamed: 0,id,type,geometry,properties
0,6158875.2016.9.28.16,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."
1,6158875.2016.9.28.17,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."
2,6158875.2016.9.28.18,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."
3,6158875.2016.9.28.19,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."
4,6158875.2016.9.28.20,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."


In [206]:
import json
def ensure_dict(obj):
    if isinstance(obj, str):
        try:
            return json.loads(obj.replace("'", "\""))
        except json.JSONDecodeError:
            return {}
    return obj

In [207]:
df_yearly['geometry'] = df_yearly['geometry'].apply(ensure_dict)
df_yearly['properties'] = df_yearly['properties'].apply(ensure_dict)
df_yearly.head()

Unnamed: 0,id,type,geometry,properties
0,6158875.2016.9.28.16,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."
1,6158875.2016.9.28.17,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."
2,6158875.2016.9.28.18,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."
3,6158875.2016.9.28.19,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."
4,6158875.2016.9.28.20,Feature,"{'coordinates': [-77.52805555555555, 44.118888...","{'TEMP_FLAG': None, 'HUMIDEX_FLAG': None, 'LOC..."


# Pd.series
- A Pandas Series is like a column in a table. It is a one-dimensional array holding data of any type.

In [208]:
df_yearly[['longitude', 'latitude']] = df_yearly['geometry'].apply(
    lambda x: x.get('coordinates', [None, None])).apply(pd.Series)

properties_df = df_yearly['properties'].apply(pd.Series)

In [200]:
df_yearly = pd.concat([df_yearly.drop(['geometry', 'properties'], axis=1), properties_df], axis=1)

In [201]:
df_yearly.head()

Unnamed: 0,id,type,longitude,latitude,TEMP_FLAG,HUMIDEX_FLAG,LOCAL_MONTH,PRECIP_AMOUNT_FLAG,WINDCHILL_FLAG,WIND_SPEED_FLAG,...,DEW_POINT_TEMP_FLAG,UTC_YEAR,UTC_MONTH,STATION_PRESSURE,WIND_DIRECTION,STATION_NAME,PRECIP_AMOUNT,VISIBILITY_FLAG,UTC_DATE,WEATHER_FRE_DESC
0,6158875.2016.12.31.19,Feature,-77.528056,44.118889,,,12,,,,...,,2017,1,99.22,23.0,TRENTON A,,,2017-01-01T00:00:00,"Pluie,Brouillard"
1,6158875.2016.12.31.20,Feature,-77.528056,44.118889,,,12,,,,...,,2017,1,99.23,23.0,TRENTON A,,,2017-01-01T01:00:00,"Pluie,Brouillard"
2,6158875.2016.12.31.21,Feature,-77.528056,44.118889,,,12,,,,...,,2017,1,99.28,24.0,TRENTON A,,,2017-01-01T02:00:00,"Pluie,Neige,Brouillard"
3,6158875.2016.12.31.22,Feature,-77.528056,44.118889,,,12,,,,...,,2017,1,99.26,23.0,TRENTON A,,,2017-01-01T03:00:00,Pluie
4,6158875.2016.12.31.23,Feature,-77.528056,44.118889,,,12,,,,...,,2017,1,99.32,24.0,TRENTON A,,,2017-01-01T04:00:00,Pluie


In [202]:
df_yearly.to_csv(f'./data/weather_data{year}.csv', index=False)