In [8]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import plotly.express as px

# Generate data
N = 365
P = 20
start_date = pd.to_datetime('2024-01-01')
dates = pd.date_range(start=start_date, periods=N, freq='D')
values = [30 * np.sin(2 * np.pi * i / 100) + 10 * np.random.randn() for i, _ in enumerate(dates)]

# Create DataFrame
df = pd.DataFrame({'date': dates, 'value': values})

# Split data into training and testing sets
train_size = int(len(df) * 0.8)
train, test = df[:train_size], df[train_size:]

# Fit model
model = SARIMAX(train['value'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
results = model.fit()

# Predict next 20 time steps
future_dates = pd.date_range(start=test['date'].iloc[-1] + pd.Timedelta(days=1), periods=P, freq='D')
future_df = pd.DataFrame({'date': future_dates})
future_df.set_index('date', inplace=True)
future_predictions = results.forecast(20)
future_df['date'] = future_dates
future_df['value'] = future_predictions.values

# Combine original data with predictions
combined_df = pd.concat([df, future_df])

# Plot results
colors = ['step count'] * N + ['predicted'] * P
fig = px.scatter(combined_df, x='date', y='value', color=colors)
fig.update_layout(title='Time Series Prediction', xaxis_title='Date', yaxis_title='Value')
fig.show()



 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  3.82772D+00    |proj g|=  7.73535D-02

At iterate    5    f=  3.76988D+00    |proj g|=  2.51977D-02

At iterate   10    f=  3.70342D+00    |proj g|=  1.97794D-02

At iterate   15    f=  3.70286D+00    |proj g|=  7.12843D-06

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5     15     18      1     0     0   7.128D-06   3.703D+00
  F =   3.7028568206889023     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            


In [9]:
len(combined_df)

385

In [10]:
combined_df

Unnamed: 0,date,value
0,2024-01-01,-13.907522
1,2024-01-02,0.441287
2,2024-01-03,5.951320
3,2024-01-04,3.678472
4,2024-01-05,-7.954863
...,...,...
2025-01-15 00:00:00,2025-01-15,-27.328932
2025-01-16 00:00:00,2025-01-16,-22.967716
2025-01-17 00:00:00,2025-01-17,-25.830515
2025-01-18 00:00:00,2025-01-18,-25.111509
