In [1]:
import pandas as pd
import warnings
from pmdarima import auto_arima
from datetime import datetime, timedelta
import json

In [3]:
file_path = "../Data/Interim/cleaned_food_prices.csv"
df_range = pd.read_csv(file_path)

#dropping columns related to food price index
df_nofpi_range = df_range.drop(columns=['o_food_price_index', 'h_food_price_index', 'l_food_price_index', 'c_food_price_index', 'inflation_food_price_index', 'trust_food_price_index'])

# Convert 'Date' column to datetime format
df_nofpi_range['Date'] = pd.to_datetime(df_nofpi_range['Date'])

#dropping columns related to inflation
df_noinf_range = df_nofpi_range.drop(columns=['inflation_beans','inflation_cabbage', 'inflation_carrots', 'inflation_eggs', 'inflation_meat_beef_chops', 'inflation_meat_chicken_whole', 'inflation_meat_pork', 'inflation_onions', 'inflation_potatoes', 'inflation_rice', 'inflation_tomatoes'])

#dropping columns related to trust scores
df_cleaned_range = df_noinf_range.drop(columns=['trust_beans','trust_cabbage', 'trust_carrots', 'trust_eggs', 'trust_meat_beef_chops', 'trust_meat_chicken_whole', 'trust_meat_pork', 'trust_onions', 'trust_potatoes', 'trust_rice', 'trust_tomatoes'])

#dropping uneeded columns
df_unneeded_range = df_cleaned_range.drop(columns=['country', 'City', 'lat', 'lon', 'Province', 'year', 'month'])

# Reshaping from wide to long format (including year and month as part of the identifier)
df_range = df_unneeded_range.melt(id_vars=['Region', 'Date'], var_name='Food_Items', value_name='Price')

df_range_filtered = df_range[df_range['Food_Items'].str.startswith('c_')]

df_range_filtered.loc[:, 'Food_Items'] = df_range_filtered['Food_Items'].str.replace('c_', '', regex=True)

# Sort by date
dfml = df_range_filtered.sort_values(by=['Region', 'Date'])

In [4]:
# Suppress warnings
warnings.filterwarnings("ignore")

def preprocess_data(df, region, item):
    """Preprocess data for a specific region and item."""
    region_df = df[(df['Region'] == region) & (df['Food_Items'] == item)]
    region_df = (
        region_df.groupby('Date', as_index=False)['Price']
        .mean()  # Aggregate duplicate dates by averaging
    )
    region_df = region_df.set_index('Date').asfreq('MS')  # Ensure monthly frequency
    region_df['Price'] = region_df['Price'].interpolate()  # Fill missing values
    return region_df

def generate_forecast_with_params(df, region, item):
    """Generate forecast with extended parameter search."""
    region_df = preprocess_data(df, region, item)
    
    if len(region_df) < 12:
        return None
    
    try:
        # Fit ARIMA model with extended parameter search
        model = auto_arima(region_df['Price'],
                          start_p=0, max_p=10,
                          start_d=0, max_d=10,
                          start_q=0, max_q=10,
                          seasonal=True,
                          m=12,
                          stepwise=False,  # Use complete grid search
                          trace=True,
                          n_jobs=-1,
                          error_action='ignore',
                          suppress_warnings=True)
        
        # Generate forecast
        forecast = model.predict(n_periods=36)
        
        # Get model order
        order = model.order
        seasonal_order = model.seasonal_order
        
        # Create future dates
        last_date = region_df.index[-1]
        future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), 
                                   periods=36, freq='MS')
        
        # Create results dictionary
        results = {
            'region': region,
            'item': item,
            'historical_dates': region_df.index.strftime('%Y-%m-%d').tolist(),
            'historical_values': region_df['Price'].tolist(),
            'forecast_dates': future_dates.strftime('%Y-%m-%d').tolist(),
            'forecast_values': forecast.tolist(),
            'p': order[0],
            'd': order[1],
            'q': order[2],
            'P': seasonal_order[0],
            'D': seasonal_order[1],
            'Q': seasonal_order[2],
            'aic': model.aic()
        }
        
        return results
    
    except Exception as e:
        print(f"Error generating forecast for {region} - {item}: {str(e)}")
        return None

def save_forecasts(df, output_file='forecasts.csv'):
    """Generate and save forecasts for all combinations."""
    regions = sorted(df['Region'].unique())
    items = sorted(df['Food_Items'].unique())
    
    all_forecasts = []
    
    for region in regions:
        for item in items:
            print(f"Processing {region} - {item}")
            forecast = generate_forecast_with_params(df, region, item)
            
            if forecast:
                # Flatten the forecast data for CSV storage
                forecast_row = {
                    'region': region,
                    'item': item,
                    'historical_dates': json.dumps(forecast['historical_dates']),
                    'historical_values': json.dumps(forecast['historical_values']),
                    'forecast_dates': json.dumps(forecast['forecast_dates']),
                    'forecast_values': json.dumps(forecast['forecast_values']),
                    'p': forecast['p'],
                    'd': forecast['d'],
                    'q': forecast['q'],
                    'P': forecast['P'],
                    'D': forecast['D'],
                    'Q': forecast['Q'],
                    'aic': forecast['aic']
                }
                all_forecasts.append(forecast_row)
    
    # Convert to DataFrame and save
    forecasts_df = pd.DataFrame(all_forecasts)
    forecasts_df.to_csv(output_file, index=False)
    print(f"Forecasts saved to {output_file}")

# Example usage:
if __name__ == "__main__":
    
     save_forecasts(dfml, 'forecasts.csv')

Processing Autonomous region in Muslim Mindanao - beans

Best model:  ARIMA(3,1,2)(0,0,0)[12] intercept
Total fit time: 15.297 seconds
Processing Autonomous region in Muslim Mindanao - cabbage

Best model:  ARIMA(2,1,1)(1,0,1)[12] intercept
Total fit time: 7.564 seconds
Processing Autonomous region in Muslim Mindanao - carrots

Best model:  ARIMA(2,1,1)(0,0,0)[12] intercept
Total fit time: 9.199 seconds
Processing Autonomous region in Muslim Mindanao - eggs

Best model:  ARIMA(0,1,5)(0,0,0)[12] intercept
Total fit time: 11.228 seconds
Processing Autonomous region in Muslim Mindanao - meat_beef_chops

Best model:  ARIMA(1,2,2)(0,0,0)[12]          
Total fit time: 7.077 seconds
Processing Autonomous region in Muslim Mindanao - meat_chicken_whole

Best model:  ARIMA(3,1,0)(0,0,0)[12] intercept
Total fit time: 7.021 seconds
Processing Autonomous region in Muslim Mindanao - meat_pork

Best model:  ARIMA(2,1,2)(1,0,0)[12] intercept
Total fit time: 6.435 seconds
Processing Autonomous region i