In [None]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load your data
df = pd.read_csv(r'C:\Users\HP\Documents\FMR-dataset.csv')
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')
df.set_index('Date', inplace=True)

# Ensure the DataFrame index is sorted
df = df.sort_index()

# Split the data: Training up until 31st October
train_data = df[:'2024-10-31']  
train_data['lag_1'] = train_data['Actual VPI'].shift(1)
train_data['lag_2'] = train_data['Actual VPI'].shift(2)
train_data['rolling_mean_3'] = train_data['Actual VPI'].rolling(window=3).mean()
train_data['rolling_std_3'] = train_data['Actual VPI'].rolling(window=3).std()

train_data = train_data.dropna()

# Define the SARIMA model with exogenous variables
exog_vars = ['lag_1', 'lag_2', 'rolling_mean_3', 'rolling_std_3']
sarima_model = SARIMAX(train_data['Actual VPI'], 
                       order=(1, 1, 1), 
                       seasonal_order=(1, 1, 1, 12), 
                       exog=train_data[exog_vars], 
                       enforce_stationarity=False, 
                       enforce_invertibility=False)

# Fit the model
sarima_results = sarima_model.fit(disp=False)

# Calculate predicted values on the training data
predictions = sarima_results.predict(start=train_data.index[0], end=train_data.index[-1], exog=train_data[exog_vars])

# Calculate MAE
actual_values = train_data['Actual VPI']
mae = (abs(actual_values - predictions)).mean()

print(f"Mean Absolute Error (MAE): {mae}")

# Generate future dates for the forecast 
future_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=396, freq='D')

# Create a list to store the forecasted values
forecast_values = []

# Use the last known values for the first forecast step
last_lag_1 = df['Actual VPI'].iloc[-1]
last_lag_2 = df['Actual VPI'].iloc[-2]
rolling_mean_3 = df['Actual VPI'].rolling(window=3).mean().iloc[-1]
rolling_std_3 = df['Actual VPI'].rolling(window=3).std().iloc[-1]

# Forecast iteratively for each of the 365 days
for i in range(396):
    # Create the exogenous variables for the current step
    future_exog = pd.DataFrame({
        'lag_1': [last_lag_1],
        'lag_2': [last_lag_2],
        'rolling_mean_3': [rolling_mean_3],
        'rolling_std_3': [rolling_std_3]
    })
    
    # Forecast the next step
    forecast_step = sarima_results.forecast(steps=1, exog=future_exog)[0]
    forecast_values.append(forecast_step)
    
    # Update the exogenous variables for the next step
    last_lag_2 = last_lag_1  # Update lag_2 with the previous day's lag_1
    last_lag_1 = forecast_step  # Update lag_1 with the predicted value
    rolling_mean_3 = (rolling_mean_3 * 2 + forecast_step) / 3  # Update rolling mean with the new forecast
    rolling_std_3 = ((rolling_std_3 ** 2 * 2) + (forecast_step - rolling_mean_3) ** 2) / 3  # Update rolling std with new forecast

# Create a DataFrame with the forecast results
forecast_df = pd.DataFrame({
    'Date': future_dates,
    'Forecasted Value': forecast_values
})

# Save the forecast to a CSV file
forecast_df.to_csv('Actual_VPI_forecast_396_days.csv', index=False)

# Print the first few rows of the forecast
print(forecast_df.head())
