In [None]:
# Import relevant python modules
import numpy as np
import pandas as pd
# Force display options to be in float format instead
# of scientific notations
pd.options.display.float_format = '{:,.2f}'.format

In [None]:
# 
# 1. Grab the paths of all CSV files in your folder
path = './data/infra-projects/dpwh_flood_control_projects.csv'
df_infra = pd.read_csv(path)

df_infra.columns
df_infra.head()

df_infra.info()

In [None]:
# 1. Create a clean subset
df_infra = df_infra[[
    'FundingYear', 'Region', 'Province', 'Municipality', 
    'ApprovedBudgetForContract', 'ContractCost', 
    'ActualCompletionDate', 'TypeOfWork'
]].copy()

# 2. Convert to numeric (handles cases where they might still be strings)
df_infra['ApprovedBudgetForContract'] = pd.to_numeric(df_infra['ApprovedBudgetForContract'], errors='coerce')
df_infra['ContractCost'] = pd.to_numeric(df_infra['ContractCost'], errors='coerce')

# 3. Create your 'Final_Budget' column using the fallback logic
df_infra['Final_Budget'] = df_infra['ContractCost'].fillna(df_infra['ApprovedBudgetForContract'])

# 4. Create a 'Savings' column (The difference between ABC and ContractCost)
df_infra['Budget_Variance'] = df_infra['ApprovedBudgetForContract'] - df_infra['ContractCost']

# 5. Convert to Millions for your final study reporting
df_infra['Final_Budget_M'] = df_infra['Final_Budget'] / 1_000_000

In [None]:
# Save your cleaned dataframe to a specific folder
# 'index=False' prevents pandas from adding an extra column of numbers at the start
df_infra.to_csv('data/infra-projects/cleaned_infra_projects.csv', index=False)

