In [None]:
import numpy as np
import pandas as pd

# 1. Load your clean datasets
df_typhoon = pd.read_csv('./data/typhoon-info/cleaned_2019-2025.csv')
df_infra = pd.read_csv('./data/infra-projects/cleaned_infra_projects.csv')

# 2. Standardize Dates
df_typhoon['Date'] = pd.to_datetime(df_typhoon['Date'])
df_infra['ActualCompletionDate'] = pd.to_datetime(df_infra['ActualCompletionDate'])

# 3. We calculate the flood control spending so far with a given typhoon event
def get_full_infra_context(row):
    # Match Province AND ensure the project was finished BEFORE the typhoon hit
    mask = (df_infra['Province'] == row['Province']) & (df_infra['ActualCompletionDate'] < row['Date'])
    matching = df_infra[mask]

    if matching.empty:
        return pd.Series([0.0, 0.0, 0.0], 
                         index=['Cumulative_Budget_To_Date', 'Cumulative_Variance_To_Date', 'Variance_Ratio_To_Date'])

    # a. Sum the Budget
    budget_so_far = matching['Final_Budget'].sum()

    # b. Sum the Variance
    variance_so_far = matching['Budget_Variance'].sum()

    # c. Calculate the Ratio
    variance_ratio = (variance_so_far / budget_so_far) if budget_so_far != 0 else 0

    return pd.Series([budget_so_far, variance_so_far, variance_ratio], 
                     index=['Cumulative_Budget_To_Date', 'Cumulative_Variance_To_Date', 'Variance_Ratio_To_Date'])

# 4. Attach the data
# We use .apply() to create multiple columns at once
infra_cols = df_typhoon.apply(get_full_infra_context, axis=1)
df_typhoon = pd.concat([df_typhoon, infra_cols], axis=1)

# 5. Sort by Date
df_typhoon = df_typhoon.sort_values(by='Date', ascending=True)

# 6. Save the final file
df_typhoon.to_csv('./data/merged/typhoon-info-infra-project.csv', index=False)