# Read weather data from Canada government

In [None]:
import pandas as pd
import wget
import os

In [None]:
# read weather data from: https://climat.meteo.gc.ca/historical_data/search_historic_data_f.html?searchType=stnName&timeframe=1&txtStationName=montreal&searchMethod=contains&optLimit=yearRange&StartYear=1840&EndYear=2024&Year=2024&Month=5&Day=1&selRowPerPage=25#stnNameTab
# execute this bash script to download data:

# Configuration
# output_dir is tbe path to store weather files
# stationID is chosen from station inventory
station_ID = 51157
start_year = 2018
end_year = 2025
output_dir = "weather_data"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Download loop
for year in range(start_year, end_year + 1):
    for month in range(1, 13):
        url = (
            "https://climate.weather.gc.ca/climate_data/bulk_data_e.html?"
            f"format=csv&stationID={station_ID}&Year={year}&Month={month}&"
            "Day=14&timeframe=1&submit=Download+Data"
        )
        print(f"Downloading: {url}")
        try:
            wget.download(url, out=output_dir)
            print()  # newline after download progress
        except Exception as e:
            print(f"\nFailed to download {url}: {e}")

# StationID used for project:
# MONTREAL INTL A: 51157
# OTTAWA GATINEAU A: 50719
# QUEBEC INTL A: 51457
# SHERBROOKE: 48371

## Merge files

Use the first cell to use complete years only, and the next if the first month is different from January or the last month is different from December.

In [None]:
years = [str(y) for y in range(2018, 2025)]
months = [f"{m:02d}" for m in range(1, 13)]

# Read and combine all monthly CSVs
dataframes = [
    pd.read_csv(f"weather_data/en_climate_hourly_QC_7025251_{month}-{year}_P1H.csv")
    for year in years for month in months
]

# Concatenate and save
data_qc = pd.concat(dataframes)
data_qc.set_index("Date/Time (LST)", inplace=True)
data_qc.to_csv("montreal_intl_a_weather_complete_years.csv")

In [None]:
# Parameters
start_year = 2018
end_year = 2025
start_month = 1
end_month = 4

station_id = "7025251"
folder = "weather_data"
file_prefix = f"en_climate_hourly_QC_{station_id}"

dataframes = []

for year in range(start_year, end_year + 1):
    # Set month range for current year
    if year == start_year:
        months = range(start_month, 13)
    elif year == end_year:
        months = range(1, end_month + 1)
    else:
        months = range(1, 13)

    for month in months:
        month_str = f"{month:02d}"
        filename = f"{folder}/{file_prefix}_{month_str}-{year}_P1H.csv"
        try:
            df = pd.read_csv(filename)
            dataframes.append(df)
        except FileNotFoundError:
            print(f"Warning: {filename} not found, skipping.")

# Concatenate and export
if dataframes:
    data_qc = pd.concat(dataframes)
    data_qc.set_index("Date/Time (LST)", inplace=True)
    data_qc.to_csv("montreal_intl_a_weather.csv")
    print("Weather data saved to montreal_intl_a_weather.csv")
else:
    print("No data files found.")

In [None]:
# read data
montreal_weather = pd.read_csv("montreal_intl_a_weather.csv")

# map column name
column_mapping = {
    "Temp (°C)": "temp",
    "Dew Point Temp (°C)": "rel_hum",
    "Wind Chill": "wind_chill",
    "Wind Spd (km/h)": "wind_speed",
    "Wind Dir (10s deg)": "wind_dir",
    "Weather": "condition",
    "Date/Time (LST)": "datetime"
}

montreal_weather = montreal_weather.rename(columns=column_mapping)

df = pd.DataFrame(montreal_weather)

In [None]:
df.tail()