# Canada Aviation Seasonality Analysis

This notebook rebuilds all of the exports we discussed in ChatGPT:

1. Load **aircraft movements** data (2012–2018) from Statistics Canada.
2. Load **screened passenger** data (2019+).
3. Compute monthly seasonality profiles:
   - Aircraft movements: 2012–2018.
   - Passengers: 2019, 2024, 2025 only.
4. Compare seasonality (movement vs passengers).
5. Optionally create synthetic monthly passengers for 2012–2018 using movements.

Make sure the following files are in the **same folder** as this notebook:

- `2310000201-eng.csv`  (aircraft movements table)
- `23100312-eng.zip`   (screened passenger traffic)
- `2310025301-eng.csv` (annual passenger totals, if you want synthetic passengers)

The notebook will create CSV and PNG outputs in the same folder.

In [None]:
import pandas as pd
import csv
import datetime as dt
import matplotlib.pyplot as plt
import zipfile
import os

print('Working directory:', os.getcwd())

## 1. Load aircraft movements (2012–2018)

We use the Statistics Canada CSV `2310000201-eng.csv` and extract the row
for **`Total, itinerant and local movements`**.

In [None]:
mov_path = '2310000201-eng.csv'

rows = []
with open(mov_path, 'r', encoding='utf-8', errors='ignore') as f:
    reader = csv.reader(f)
    for r in reader:
        rows.append(r)

# Find the header row that starts with 'Class of operation'
header_idx = None
for i, r in enumerate(rows):
    if r and r[0].strip().startswith('Class of operation'):
        header_idx = i
        break

if header_idx is None:
    raise ValueError('Could not find header row in movements CSV')

header = rows[header_idx]
total_row = rows[header_idx + 1]  # first data row = Total, itinerant and local movements

dates = header[1:]
vals = total_row[1:]

mov_records = []
for d, v in zip(dates, vals):
    if not d:
        continue
    date_obj = dt.datetime.strptime(d.strip(), '%B %Y')
    val_int = int(str(v).replace(',', ''))
    mov_records.append({
        'date': date_obj,
        'year': date_obj.year,
        'month': date_obj.month,
        'movements': val_int,
    })

mov_df = pd.DataFrame(mov_records).sort_values('date')
mov_df.head()

## 2. Compute aircraft movement seasonality (2012–2018)

In [None]:
mov_df = mov_df[(mov_df['year'] >= 2012) & (mov_df['year'] <= 2018)].copy()
mov_df['year_total'] = mov_df.groupby('year')['movements'].transform('sum')
mov_df['month_share'] = mov_df['movements'] / mov_df['year_total']
season_mov = mov_df.groupby('month', as_index=False)['month_share'].mean()
season_mov

## 3. Load screened passenger data (2019+)

In [None]:
zip_path = '23100312-eng.zip'
with zipfile.ZipFile(zip_path, 'r') as z:
    z.extractall('.')

passenger_csv = '23100312.csv'
dfp = pd.read_csv(passenger_csv)
dfp = dfp[dfp['Screened traffic'] == 'Total passengers'].copy()
dfp['date'] = pd.to_datetime(dfp['REF_DATE'])
dfp_month = dfp.groupby('date', as_index=False)['VALUE'].max().rename(columns={'VALUE': 'passengers'})
dfp_month['year'] = dfp_month['date'].dt.year
dfp_month['month'] = dfp_month['date'].dt.month
dfp_month.head()

## 4. Passenger seasonality (2019, 2024, 2025 only)

In [None]:
df_pass = dfp_month[dfp_month['year'].isin([2019, 2024, 2025])].copy()
df_pass['year_total'] = df_pass.groupby('year')['passengers'].transform('sum')
df_pass['month_share'] = df_pass['passengers'] / df_pass['year_total']
season_pass = df_pass.groupby('month', as_index=False)['month_share'].mean()
season_pass

## 5. Combine and compare seasonality

In [None]:
season = season_mov.merge(season_pass, on='month', suffixes=('_mov', '_pass'))
season['diff_pct_points'] = (season['month_share_pass'] - season['month_share_mov']) * 100
season.sort_values('month')

### Save comparison to CSV

In [None]:
out_csv = 'seasonality_mov_vs_pass_2019_2024_2025.csv'
season.to_csv(out_csv, index=False)
print('Saved:', out_csv)

## 6. Plot seasonality comparison

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(season['month'], season['month_share_mov'], marker='o', label='Movements Seasonality (2012–2018)')
plt.plot(season['month'], season['month_share_pass'], marker='o', label='Passenger Seasonality (2019, 2024, 2025)')
plt.xlabel('Month')
plt.ylabel('Average Share of Annual Traffic')
plt.title('Seasonality Comparison: Movements vs Passengers')
plt.grid(True)
plt.legend()
chart_path = 'seasonality_chart_2019_2024_2025.png'
plt.savefig(chart_path, dpi=300, bbox_inches='tight')
print('Saved chart to:', chart_path)

## 7. (Optional) Synthetic monthly passengers 2012–2018

If `2310025301-eng.csv` is present, this cell will build synthetic monthly
passenger estimates using movement shares and annual totals.

In [None]:
annual_path = '2310025301-eng.csv'
if os.path.exists(annual_path):
    annual_rows = []
    with open(annual_path, 'r', encoding='utf-8', errors='ignore') as f:
        reader = csv.reader(f)
        for r in reader:
            annual_rows.append(r)

    header_idx_a = None
    for i, r in enumerate(annual_rows):
        if r and r[0].strip().startswith('Geography'):
            header_idx_a = i
            break
    if header_idx_a is None:
        raise ValueError('Could not find header in annual CSV')

    header_a = annual_rows[header_idx_a]
    data_a = annual_rows[header_idx_a + 1:]

    canada_row = data_a[0]
    years = header_a[1:]
    vals = canada_row[1:]

    annual_totals = {}
    for y_str, v in zip(years, vals):
        y_str = y_str.strip()
        if not y_str:
            continue
        y = int(y_str)
        if 2012 <= y <= 2018:
            annual_totals[y] = int(str(v).replace(',', ''))

    print('Annual totals used:', annual_totals)

    syn_records = []
    for y in range(2012, 2019):
        year_mov = mov_df[mov_df['year'] == y].copy()
        total_mov_y = year_mov['movements'].sum()
        for _, r in year_mov.iterrows():
            share = r['movements'] / total_mov_y if total_mov_y else 0
            passengers = annual_totals.get(y, 0) * share
            syn_records.append({
                'date': r['date'],
                'year': y,
                'month': r['month'],
                'synthetic_passengers': round(passengers),
                'movement_share': share,
            })

    syn_df = pd.DataFrame(syn_records).sort_values('date')
    syn_out = 'canada_synthetic_passengers_2012_2018_from_movements.csv'
    syn_df.to_csv(syn_out, index=False)
    print('Saved synthetic monthly passengers to:', syn_out)
else:
    print('Annual passenger file 2310025301-eng.csv not found; skipping synthetic series.')