In [3]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import plotly.graph_objects as go

data = pd.read_csv('modified_spending_data.csv')
data['Timestamp'] = pd.to_datetime(data['Timestamp'], format='%Y-%m-%d %H:%M:%S')
data['Day'] = data['Timestamp'].dt.date
daily_data = data.groupby('Day')['Amount'].sum().reset_index()
daily_data['Day'] = pd.to_datetime(daily_data['Day'])
time_series_daily = daily_data.set_index('Day')['Amount']

sarima_model = sm.tsa.SARIMAX(
    time_series_daily,
    order=(1, 1, 1),
    seasonal_order=(1, 1, 1, 12),
    enforce_stationarity=False,
    enforce_invertibility=False
)
sarima_result = sarima_model.fit(disp=False)

forecast_steps = 1
forecast = sarima_result.get_forecast(steps=forecast_steps)
forecast_value = forecast.predicted_mean.iloc[0]

latest_date = data['Timestamp'].max().date()
cutoff_date = latest_date + pd.Timedelta(days=1)
end_of_month = cutoff_date + pd.offsets.MonthEnd(0)

forecast_index = pd.date_range(start=cutoff_date, end=end_of_month, freq='D')
historical_jan_data = data[data['Timestamp'].dt.month == cutoff_date.month]
historical_jan_data['Day'] = historical_jan_data['Timestamp'].dt.day
daily_avg = historical_jan_data.groupby('Day')['Amount'].sum() / historical_jan_data['Day'].nunique()
daily_forecast = daily_avg / daily_avg.sum() * forecast_value
remaining_forecast_index = pd.date_range(start=cutoff_date, end=end_of_month, freq='D')
remaining_forecast_values = daily_forecast.values[-len(remaining_forecast_index):]

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=time_series_daily.index,
    y=time_series_daily,
    mode='lines',
    name='Historical Data (Daily)',
    line=dict(color='blue')
))
fig.add_trace(go.Scatter(
    x=remaining_forecast_index,
    y=remaining_forecast_values,
    mode='lines',
    name=f'Forecast from {cutoff_date} to {end_of_month} (Daily)',
    line=dict(color='red', dash='dot')
))
fig.update_layout(
    title=f"Expenditure Forecast for {cutoff_date.strftime('%B %Y')} - Daily",
    xaxis_title="Date",
    yaxis_title="Expenditure Amount",
    template="plotly_white",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig.show()



A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

