In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import seaborn as sns  # optional if you want a quick, cleaner boxplot


In [None]:
# API endpoint
url = "https://api.weather.gc.ca/collections/climate-daily/items"
# Query parameters
provinces = ["BC", "AB", "NT"]
province_dict = {}
for province in provinces:
    params = {
        "datetime": "2019-01-01/2025-03-31",
        "limit": 10000,  # you can paginate if needed
        "PROVINCE_CODE": province,
    }
    # Make request
    response = requests.get(url, params=params)
    data = response.json()
    features = data["features"]
    records = [f["properties"] for f in features]
    province_dict[province] = pd.DataFrame(records)
    province_dict[province].to_csv(f"data/climate_data/monthly_{province}.csv", index=False)

monthly_climate_df_bc = province_dict["BC"]
monthly_climate_df_ab = province_dict["AB"]
monthly_climate_df_nwt = province_dict["NT"]

print(monthly_climate_df_bc.shape)
print(monthly_climate_df_ab.shape)
print(monthly_climate_df_nwt.shape)


In [None]:
fires_with_province = pd.read_csv('data/fires_with_provinces.csv')
fires_with_province = fires_with_province[
    fires_with_province['PREABBR'].isin(['B.C.', 'N.W.T.', 'Alta.'])
]
print(fires_with_province.shape)

In [None]:
# Convert acq_date to datetime and create Year/Month columns
fires_with_province['acq_date'] = pd.to_datetime(fires_with_province['acq_date'])
fires_with_province['Year'] = fires_with_province['acq_date'].dt.year
fires_with_province['Month'] = fires_with_province['acq_date'].dt.month


In [None]:
# -------------------------------------------------
# Visualization 1: Annual Wildfire Trend
# -------------------------------------------------
# Aggregate annual counts by province (PREABBR)
annual_counts = (fires_with_province
                 .groupby(['PREABBR', 'Year'])
                 .size()
                 .reset_index(name='fire_count'))

plt.figure(figsize=(12,6))
sns.lineplot(data=annual_counts, x='Year', y='fire_count', hue='PREABBR', marker='o')
plt.title('Annual Wildfire Counts by Province (2019 - 2025)')
plt.xlabel('Year')
plt.ylabel('Number of Fires')
plt.legend(title='Province')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# -------------------------------------------------
# Visualization 2: Seasonal (Monthly) Wildfire Pattern
# -------------------------------------------------
# Aggregate fire counts by Month for each province
monthly_counts = (fires_with_province
                  .groupby(['PREABBR', 'Month'])
                  .size()
                  .reset_index(name='fire_count'))

plt.figure(figsize=(12,6))
sns.boxplot(x='Month', y='fire_count', hue='PREABBR', data=monthly_counts)
plt.title('Distribution of Monthly Wildfire Counts by Province')
plt.xlabel('Month')
plt.ylabel('Wildfire Count')
plt.legend(title='Province', loc='upper right')
plt.tight_layout()
plt.show()

# Alternatively, show average fires per month (across years)
avg_monthly = (monthly_counts
               .groupby(['PREABBR', 'Month'])['fire_count']
               .mean()
               .reset_index())

plt.figure(figsize=(12,6))
sns.lineplot(data=avg_monthly, x='Month', y='fire_count', hue='PREABBR', marker='o')
plt.title('Average Monthly Wildfire Counts by Province')
plt.xlabel('Month')
plt.ylabel('Average Wildfire Count')
plt.legend(title='Province')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:

# -------------------------------------------------
# Visualization 3: Geographical Spread of Fires by Season
# -------------------------------------------------
# To avoid plotting too many points, take a random sample (e.g., 5,000 points)
sample_fires = fires_with_province.sample(n=5000, random_state=42)

# Create a season column based on month
def season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'

sample_fires['Season'] = sample_fires['Month'].apply(season)

plt.figure(figsize=(10,8))
sns.scatterplot(data=sample_fires, x='longitude', y='latitude', hue='Season', style='PREABBR', alpha=0.6)
plt.title('Geographical Distribution of Wildfires by Season')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(title='Season / Province', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


# 2 : Concat wildfire with 7-day climate

To gain some coorelation of climate change params with the wildfire occurences

* get the climate info for a day range, lat/long for location prec (like 7 days)
* use for loop in the dataset to call the function for each wildfire
* aggregate precipitation, other params
* add the value in the orignal dataset




In [None]:
# ECCC Climate Daily API endpoint
CLIMATE_URL = "https://api.weather.gc.ca/collections/climate-daily/items"

# Columns you want to average
numeric_cols =['TOTAL_PRECIPITATION','MEAN_TEMPERATURE','MAX_TEMPERATURE','MIN_TEMPERATURE']

# Prepare a list of rows to reconstruct a final DataFrame
updated_rows = []

for idx, row in fires_with_province.iterrows():
    if idx > 1000: break
    lat = row['latitude']
    lon = row['longitude']
    
    # Parse the 'acq_date' (e.g., 'YYYY-MM-DD')
    # Adjust format if your actual column differs
    date_str = row['acq_date']
    date_obj = datetime.strptime(str(date_str), "%Y-%m-%d")
    
    # Compute ±3 days
    start_date = (date_obj - timedelta(days=3)).strftime("%Y-%m-%d")
    end_date = (date_obj + timedelta(days=3)).strftime("%Y-%m-%d")
    
    # Example: use a small bounding box around the lat/long 
    # so the API returns daily data for stations near the fire.
    # Adjust the +/-0.5 as needed for your region and coverage.
    bbox = f"{lon-1.5},{lat-1.5},{lon+1.5},{lat+1.5}"
    
    # Build query params
    params = {
        "datetime": f"{start_date}/{end_date}",
        "bbox": bbox,
        "limit": 1000  # increase if needed
    }
    
    # Make the API request
    response = requests.get(CLIMATE_URL, params=params)
    
    # Default empty series if nothing found or error
    mean_vals = pd.Series(dtype='float64')
    
    if response.status_code == 200:
        data_json = response.json()
        features = data_json.get("features", [])
        
        if len(features) > 0:
            # Convert to DataFrame
            records = [feature["properties"] for feature in features]
            daily_df = pd.DataFrame(records)
            
            # Convert columns to numeric (coercing errors to NaN)
            numeric_df = daily_df[numeric_cols].apply(pd.to_numeric, errors='coerce')
            
            # Compute column-wise means
            mean_vals = numeric_df.mean()
    
    # Attach the average values back to the current row
    for col in numeric_cols:
        row[f"weekly_{col.lower()}"] = mean_vals.get(col, None)
    
    updated_rows.append(row)

# Construct a new DataFrame with the updated columns
fire_with_province_merged = pd.DataFrame(updated_rows)


In [None]:
fire_with_province_merged.to_csv("data/fire_with_province_with_climate.csv")
fire_with_province_merged.head()

---

## Visualizations for natural causes

In [None]:
# ================================
# PREPARE THE DATA
# ================================

# Ensure the date columns are parsed as datetime
fires_with_province['acq_date'] = pd.to_datetime(fires_with_province['acq_date'])
# For monthly climate data, assume there is a 'LOCAL_DATE' column.
monthly_climate_df_bc['LOCAL_DATE'] = pd.to_datetime(monthly_climate_df_bc['LOCAL_DATE'])
monthly_climate_df_ab['LOCAL_DATE'] = pd.to_datetime(monthly_climate_df_ab['LOCAL_DATE'])
monthly_climate_df_nwt['LOCAL_DATE'] = pd.to_datetime(monthly_climate_df_nwt['LOCAL_DATE'])

# Add a Province identifier to each monthly climate dataframe
monthly_climate_df_bc['Province'] = 'BC'
monthly_climate_df_ab['Province'] = 'AB'
monthly_climate_df_nwt['Province'] = 'NT'

# Combine monthly climate dataframes into one for easier plotting
monthly_climate_all = pd.concat([monthly_climate_df_bc, monthly_climate_df_ab, monthly_climate_df_nwt], ignore_index=True)

# Extract year and month from fire acquisition date for aggregation
fires_with_province['Year'] = fires_with_province['acq_date'].dt.year
fires_with_province['Month'] = fires_with_province['acq_date'].dt.month

In [None]:
# ================================
# 1. Seasonal Patterns in Climate Data
# ================================
# Plot monthly mean temperature for each province over time (from climate data)
plt.figure(figsize=(12,6))
sns.lineplot(data=monthly_climate_all, x='LOCAL_DATE', y='MEAN_TEMPERATURE', hue='Province', marker='o')
plt.title('Mean Temperature Over Time by Province (2019 Jan - 2025 Mar)')
plt.xlabel('Date')
plt.ylabel('Mean Temperature (°C)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Plot monthly total precipitation for each province over time
plt.figure(figsize=(12,6))
sns.lineplot(data=monthly_climate_all, x='LOCAL_DATE', y='TOTAL_PRECIPITATION', hue='Province', marker='o')
plt.title('Total Precipitation Over Time by Province (2019 Jan - 2025 Mar)')
plt.xlabel('Date')
plt.ylabel('Total Precipitation (mm)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# ================================
# 2. Seasonal Patterns in Wildfire Occurrence
# ================================
# Aggregate fire counts by province, year, and month
fire_counts = fires_with_province.groupby(['PREABBR', 'Year', 'Month']).size().reset_index(name='fire_count')

# Plot the average wildfire count by month (aggregated over years) for each province
plt.figure(figsize=(12,6))
sns.lineplot(data=fire_counts, x='Month', y='fire_count', hue='PREABBR', marker='o')
plt.title('Average Wildfire Count by Month by Province')
plt.xlabel('Month')
plt.ylabel('Average Number of Fires')
plt.legend(title='Province')
plt.tight_layout()
plt.show()

# Boxplot to show the distribution of monthly wildfire counts per province
plt.figure(figsize=(10,6))
sns.boxplot(data=fire_counts, x='Month', y='fire_count', hue='PREABBR')
plt.title('Distribution of Monthly Wildfire Counts by Province')
plt.xlabel('Month')
plt.ylabel('Wildfire Count')
plt.tight_layout()
plt.show()

In [None]:
# ================================
# 3. Annual Trend in Wildfire Frequency
# ================================
# Aggregate annual fire counts by province
annual_fire_counts = fires_with_province.groupby(['PREABBR', 'Year']).size().reset_index(name='fire_count')

plt.figure(figsize=(12,6))
sns.lineplot(data=annual_fire_counts, x='Year', y='fire_count', hue='PREABBR', marker='o')
plt.title('Annual Wildfire Counts by Province (2019 - 2025)')
plt.xlabel('Year')
plt.ylabel('Number of Fires')
plt.xticks(annual_fire_counts['Year'].unique())
plt.tight_layout()
plt.show()

In [None]:
# ================================
# 4. Linking Fires with Weekly Climate Aggregates
# ================================
# Assume fires_with_province already includes the following weekly aggregate columns:
# ['avg_total_precipitation','avg_mean_temperature','avg_max_temperature','avg_min_temperature', 'avg_bright_sunshine']
# Plot scatter plots to check correlations between these climate variables and fire count per record.
# (If each row in fires_with_province represents a fire event with its corresponding weekly climate averages)

plt.figure(figsize=(10,6))
sns.scatterplot(data=fires_with_province, x='avg_mean_temperature', y='avg_total_precipitation', hue='PREABBR')
plt.title('Avg. Mean Temperature vs. Avg. Total Precipitation (Weekly Data)')
plt.xlabel('Avg. Mean Temperature (°C)')
plt.ylabel('Avg. Total Precipitation (mm)')
plt.tight_layout()
plt.show()

# If you want to see how these weekly climate values relate to fire occurrence,
# you might first aggregate the data by week. For example, grouping by Year, Month, and Province:
weekly_climate = fires_with_province.groupby(['PREABBR', 'Year', 'Month']).agg({
    'avg_mean_temperature': 'mean',
    'avg_total_precipitation': 'mean',
    'avg_max_temperature': 'mean',
    'avg_min_temperature': 'mean',
    'avg_bright_sunshine': 'mean'
}).reset_index()

# Create a date column for time series visualization
weekly_climate['Date'] = pd.to_datetime(weekly_climate['Year'].astype(str) + '-' + weekly_climate['Month'].astype(str) + '-01')

# Plot weekly average mean temperature over time by province (from fires_with_province)
plt.figure(figsize=(12,6))
sns.lineplot(data=weekly_climate, x='Date', y='avg_mean_temperature', hue='PREABBR', marker='o')
plt.title('Weekly Avg. Mean Temperature Over Time by Province (Fires Data)')
plt.xlabel('Date')
plt.ylabel('Avg. Mean Temperature (°C)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Similarly, plot weekly average total precipitation over time by province
plt.figure(figsize=(12,6))
sns.lineplot(data=weekly_climate, x='Date', y='avg_total_precipitation', hue='PREABBR', marker='o')
plt.title('Weekly Avg. Total Precipitation Over Time by Province (Fires Data)')
plt.xlabel('Date')
plt.ylabel('Avg. Total Precipitation (mm)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# ================================
# 5. Additional Analysis: Correlation Heatmap of Climate Variables
# ================================
# Consider only the weekly aggregate climate variables in fires_with_province
climate_vars = ['avg_total_precipitation','avg_mean_temperature','avg_max_temperature','avg_min_temperature']

# Compute correlation matrix (converting to numeric if necessary)
corr_data = fires_with_province[climate_vars].apply(pd.to_numeric, errors='coerce')
corr_matrix = corr_data.corr()

plt.figure(figsize=(8,6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix of Weekly Climate Variables')
plt.tight_layout()
plt.show()


In [None]:
# ================================
# 6. Extra Visualization: Fire Count vs. Climate Variables
# ================================
# If fires_with_province has one row per fire event, you can aggregate by week (or by day)
# and then examine the relationship between aggregated fire counts and climate.
# For example, compute weekly fire counts:

weekly_fire_counts = fires_with_province.groupby(['PREABBR', 'Year', 'Month']).size().reset_index(name='fire_count')
weekly_fire_counts['Date'] = pd.to_datetime(weekly_fire_counts['Year'].astype(str) + '-' + weekly_fire_counts['Month'].astype(str) + '-01')

plt.figure(figsize=(12,6))
sns.scatterplot(data=weekly_fire_counts, x='fire_count', y='Year', hue='PREABBR')
plt.title('Weekly Fire Counts by Year and Province')
plt.xlabel('Weekly Fire Count')
plt.ylabel('Year')
plt.tight_layout()
plt.show()

# You could also merge weekly_fire_counts with weekly_climate (if they share Year, Month, and Province)
# to explore, for example, how avg_mean_temperature relates to fire_count:
merged_weekly = pd.merge(weekly_fire_counts, weekly_climate, on=['PREABBR', 'Year', 'Month', 'Date'])

plt.figure(figsize=(10,6))
sns.scatterplot(data=merged_weekly, x='avg_mean_temperature', y='fire_count', hue='PREABBR')
plt.title('Weekly Fire Count vs. Avg. Mean Temperature by Province')
plt.xlabel('Avg. Mean Temperature (°C)')
plt.ylabel('Weekly Fire Count')
plt.tight_layout()
plt.show()