In [3]:
import pandas as pd
import os
from functools import reduce

# --- 1. DEFINE FILE PATHS AND LOAD THE CLEANED CSVs ---
data_path = '../data/processed/timeseries_data/'

try:
    # Load each of your four final, consolidated CSV files
    gsdp_df = pd.read_csv(os.path.join(data_path, 'gsdp_data.csv'))
    deficit_df = pd.read_csv(os.path.join(data_path, 'fiscal_deficit_data.csv'))
    inflation_df = pd.read_csv(os.path.join(data_path, 'cpi_inflation_data.csv'))
    agri_df = pd.read_csv(os.path.join(data_path, 'agri_production_data.csv'))
    print("✅ All data files loaded successfully.")

except FileNotFoundError as e:
    print(f"Error: Make sure all four required CSV files are in the '/data/processed/timeseries_data/' folder.")
    print(f"Missing file: {e.filename}")
    exit()

# --- 2. MERGE ALL DATAFRAMES INTO A MASTER TABLE ---
print("Merging all datasets into a master table...")

# Create a list of the dataframes to merge
dataframes_to_merge = [gsdp_df, deficit_df, inflation_df, agri_df]

# Use the 'reduce' function to iteratively merge all dataframes on 'State' and 'Year'
master_df = reduce(lambda left, right: pd.merge(left, right, on=['State', 'Year'], how='outer'), dataframes_to_merge)

# --- 3. FINAL CLEANING AND SORTING ---
print("Performing final cleaning and sorting...")

# Ensure 'Year' is an integer
master_df['Year'] = master_df['Year'].astype(int)

# Sort the data chronologically for each state
master_df.sort_values(by=['State', 'Year'], inplace=True)

# --- 4. SAVE AND PREVIEW THE FINAL DATASET ---
output_path = '../data/processed/final_forecasting_dataset.csv'
master_df.to_csv(output_path, index=False)

print(f"\n✅ Successfully created the final master table for forecasting!")
print(f"Saved to: {output_path}")
print("\n--- Preview of the Final Master Table ---")
print(master_df.head())
print("\n--- Last 5 rows of the final master table ---")
print(master_df.tail())

✅ All data files loaded successfully.
Merging all datasets into a master table...
Performing final cleaning and sorting...

✅ Successfully created the final master table for forecasting!
Saved to: ../data/processed/final_forecasting_dataset.csv

--- Preview of the Final Master Table ---
                       State  Year    GSDP  Fiscal_Deficit_Percent  \
0  Andaman & Nicobar Islands  2011  3921.0                     NaN   
1  Andaman & Nicobar Islands  2017  7162.0                     NaN   
2  Andaman & Nicobar Islands  2018  8023.0                     NaN   
3  Andaman & Nicobar Islands  2019  8767.0                     NaN   
4  Andaman & Nicobar Islands  2020  8249.0                     NaN   

   CPI_Inflation  Agri_Production_Thousand_Tonnes  
0            NaN                              NaN  
1            NaN                              NaN  
2            NaN                              NaN  
3            NaN                              NaN  
4            NaN               