# Feature Engineering: Banff Tourism Prediction
**Description:**
This notebook generates a daily time-series dataset (2024-2025) to serve as input features for predicting tourist flow in Banff, Alberta.

**Key Variables:**
* **Statutory Holidays:** Aggregates holidays from **Alberta** (local market), **British Columbia** (regional drive market), and the **USA** (major international market).
* **High-Impact Events:** Includes custom binary flags for the **Calgary Stampede** and **Spring Break**, differentiating between winter and summer peak demands.
* **Temporal Features:** Day of the week and weekend indicators to capture weekly cyclicality.

In [1]:
import holidays
import pandas as pd
from datetime import date

def generate_ml_features(start_year, end_year):
    # Full date range (daily)
    full_date_range = pd.date_range(start=f'{start_year}-01-01', end=f'{end_year}-12-31')

    # 1. Initialize holiday calendars
    ab_holidays = holidays.CA(subdiv='AB', years=[start_year, end_year])
    bc_holidays = holidays.CA(subdiv='BC', years=[start_year, end_year])
    us_holidays = holidays.US(years=[start_year, end_year])

    # 2. Define Key Seasonal Dates (Hardcoded for accuracy)

    # Spring Break (CBE/EPSB impact window)
    spring_break_ranges = [
        (date(2024, 3, 22), date(2024, 4, 1)),  # Includes weekends
        (date(2025, 3, 21), date(2025, 3, 30))
    ]

    # Calgary Stampede (Official Dates)
    stampede_ranges = [
        (date(2024, 7, 5), date(2024, 7, 14)),
        (date(2025, 7, 4), date(2025, 7, 13))
    ]

    def is_in_range(target_date, ranges):
        # Fast helper function to check if a date falls inside any given range
        for start, end in ranges:
            if start <= target_date <= end:
                return 1
        return 0

    data = []

    for ts in full_date_range:
        d = ts.date()

        # Build the feature dictionary (one row per date)
        row = {
            "date": d,
            # Cyclical temporal feature (0â€“6)
            "day_of_week": d.weekday(),
            # Weekend binary feature
            "is_weekend": 1 if d.weekday() >= 5 else 0,
            # Statutory Holidays (Binary flags)
            "is_holiday_AB": 1 if d in ab_holidays else 0,
            "is_holiday_BC": 1 if d in bc_holidays else 0,
            "is_holiday_US": 1 if d in us_holidays else 0,
            # Special Seasonal Events (separate indicators)
            "is_spring_break": is_in_range(d, spring_break_ranges),
            "is_stampede": is_in_range(d, stampede_ranges)
        }

        data.append(row)

    return pd.DataFrame(data)

# Generate Dataset
df_banff = generate_ml_features(2024, 2025)

# Save file
filename = "banff_tourism_ml_features.csv"
df_banff.to_csv(filename, index=False)

print(f"File '{filename}' generated.")
print(df_banff.head())


File 'banff_tourism_ml_features.csv' generated.
         date  day_of_week  is_weekend  is_holiday_AB  is_holiday_BC  \
0  2024-01-01            0           0              1              1   
1  2024-01-02            1           0              0              0   
2  2024-01-03            2           0              0              0   
3  2024-01-04            3           0              0              0   
4  2024-01-05            4           0              0              0   

   is_holiday_US  is_spring_break  is_stampede  
0              1                0            0  
1              0                0            0  
2              0                0            0  
3              0                0            0  
4              0                0            0  
