In [None]:
import numpy as np
import pandas as pd

# 1. Load your clean datasets
df_typhoon = pd.read_csv('./data/typhoon-info/cleaned_2019-2025.csv')
df_infra = pd.read_csv('./data/infra-projects/cleaned_infra_projects.csv')

# 2. Standardize Dates
df_typhoon['Date'] = pd.to_datetime(df_typhoon['Date'])
df_infra['ActualCompletionDate'] = pd.to_datetime(df_infra['ActualCompletionDate'])

# 3. The "Sum-Before-Join" Logic
# This function finds all projects for the province and sums them up into ONE number
def get_full_infra_context(row):
    mask = (df_infra['Province'] == row['Province']) & (df_infra['ActualCompletionDate'] < row['Date'])
    matching = df_infra[mask]
    
    if matching.empty:
        return pd.Series([0, 0, 0], index=['Cumulative_Budget_To_Date', 'Cumulative_Variance_To_Date', 'Variance_Ratio_To_Date'])
    
    # 1. Sum the Budget (The Protection)
    budget_so_far = matching['Final_Budget'].sum()
    
    # 2. Sum the Variance (The Inefficiency)
    variance_so_far = matching['Budget_Variance'].sum()
    
    # 3. Calculate the Ratio (The "Waste" Percentage)
    # This shows what % of the total budget was spent on overruns/extra costs
    variance_ratio = (variance_so_far / budget_so_far) if budget_so_far != 0 else 0
    
    return pd.Series([budget_so_far, variance_so_far, variance_ratio], 
                     index=['Cumulative_Budget_To_Date', 'Cumulative_Variance_To_Date', 'Variance_Ratio_To_Date'])

# 4. Attach the sum to your typhoon data
df_typhoon['Cumulative_Budget_To_Date'] = df_typhoon.apply(get_total_protection, axis=1)

df_typhoon = df_master.sort_values(by='Date', ascending=True)

# 5. Save the final file
df_typhoon.to_csv('master_flood_efficacy_data.csv', index=False)