In [6]:
import pandas as pd
from pycaret.time_series import *

# Load dataset (adjust path if needed)
df = pd.read_csv(
    "/Users/anuradhasrivastav/Documents/github/pycaret assignment/time_series_forecasting/delhiclimate.csv"
)

# Parse date and sort
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date").set_index("date")
df.head()


Unnamed: 0_level_0,meantemp,humidity,wind_speed,meanpressure
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-01-01,10.0,84.5,0.0,1015.666667
2013-01-02,7.4,92.0,2.98,1017.8
2013-01-03,7.166667,87.0,4.633333,1018.666667
2013-01-04,8.666667,71.333333,1.233333,1017.166667
2013-01-05,6.0,86.833333,3.7,1016.5


In [7]:
# Target: temperature
y = df["meantemp"]

# Exogenous variables
X = df[["humidity", "wind_speed", "meanpressure"]]

print(y.shape, X.shape)


(1462,) (1462, 3)


In [10]:
df = pd.read_csv(
    "/Users/anuradhasrivastav/Documents/github/pycaret assignment/time_series_forecasting/delhiclimate.csv"
)

df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date").set_index("date")

# target + exogenous
y = df["meantemp"]
X = df[["humidity", "wind_speed", "meanpressure"]]


In [12]:
exp = setup(
    data=y,
    fh=30,
    fold=3,
    session_id=42,
    seasonal_period=365,
    use_gpu=False,
)


Unnamed: 0,Description,Value
0,session_id,42
1,Target,meantemp
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(1462, 1)"
5,Transformed data shape,"(1462, 1)"
6,Transformed train set shape,"(1432, 1)"
7,Transformed test set shape,"(30, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [13]:
best = compare_models(sort="MASE")
final_model = finalize_model(best)


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
arima,ARIMA,1.8847,1.6183,2.3587,2.7303,0.0875,0.0854,-4.4741,1.1467
naive,Naive Forecaster,1.9874,1.6839,2.4825,2.8374,0.0939,0.0867,-2.5564,2.6067
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,2.01,1.6783,2.51,2.8271,0.095,0.087,-2.5517,0.2233
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,2.0109,1.6547,2.5108,2.7875,0.0941,0.0882,-2.4229,0.37
theta,Theta Forecaster,2.0218,1.7113,2.5255,2.8834,0.0957,0.0886,-2.6646,0.03
ets,ETS,2.0652,1.7503,2.5788,2.9485,0.0984,0.0904,-2.7596,1.0333
exp_smooth,Exponential Smoothing,2.0653,1.7504,2.5789,2.9486,0.0984,0.0904,-2.7597,1.04
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,2.1152,1.7732,2.642,2.9878,0.0996,0.0912,-2.9805,0.2167
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,2.1636,1.8056,2.7049,3.0441,0.1011,0.093,-3.3924,0.8367
auto_arima,Auto ARIMA,2.1816,1.8255,2.7236,3.0749,0.105,0.0977,-3.152,2.23


In [15]:
last_date = df.index.max()   # last date in your original data
last_date


Timestamp('2017-01-01 00:00:00')

In [16]:
future_index = pd.date_range(start=last_date + pd.Timedelta(days=1),
                             periods=30,
                             freq="D")
future_index


DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
               '2017-01-06', '2017-01-07', '2017-01-08', '2017-01-09',
               '2017-01-10', '2017-01-11', '2017-01-12', '2017-01-13',
               '2017-01-14', '2017-01-15', '2017-01-16', '2017-01-17',
               '2017-01-18', '2017-01-19', '2017-01-20', '2017-01-21',
               '2017-01-22', '2017-01-23', '2017-01-24', '2017-01-25',
               '2017-01-26', '2017-01-27', '2017-01-28', '2017-01-29',
               '2017-01-30', '2017-01-31'],
              dtype='datetime64[ns]', freq='D')

In [17]:
# take the last row of exogenous vars
last_exog = X.iloc[[-1]].copy()  # shape (1, 3)

# repeat it 30 times to match future_index
future_exog = pd.concat([last_exog] * 30, ignore_index=True)
future_exog.index = future_index

future_exog.head()


Unnamed: 0,humidity,wind_speed,meanpressure
2017-01-02,100.0,0.0,1016.0
2017-01-03,100.0,0.0,1016.0
2017-01-04,100.0,0.0,1016.0
2017-01-05,100.0,0.0,1016.0
2017-01-06,100.0,0.0,1016.0


In [18]:
forecast = predict_model(
    final_model,
    fh=30,
    X=future_exog   # ðŸ‘ˆ this is the key change
)
forecast.head(10)


TypeError: Invalid comparison between dtype=datetime64[ns] and Period

In [19]:
from pycaret.time_series import get_config

y_train = get_config("y_train")
print(type(y_train.index))
print(y_train.index[:5])


<class 'pandas.core.indexes.period.PeriodIndex'>
PeriodIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
             '2013-01-05'],
            dtype='period[D]', name='date')


In [21]:
# 1. last date we have
last_date = df.index.max()               # e.g. 2017-01-01

# 2. make 30 future DAILY dates
future_index = pd.date_range(
    start=last_date + pd.Timedelta(days=1),
    periods=30,
    freq="D"
)

# 3. take last known exog row
last_exog = X.iloc[[-1]].copy()          # shape (1, 3)

# 4. repeat it 30 times
future_exog = pd.concat([last_exog] * 30, ignore_index=True)

# 5. set the datetime index first
future_exog.index = future_index

# 6. convert to PeriodIndex (D) to match PyCaret
future_exog.index = future_exog.index.to_period("D")

future_exog.head()


Unnamed: 0,humidity,wind_speed,meanpressure
2017-01-02,100.0,0.0,1016.0
2017-01-03,100.0,0.0,1016.0
2017-01-04,100.0,0.0,1016.0
2017-01-05,100.0,0.0,1016.0
2017-01-06,100.0,0.0,1016.0


In [22]:
forecast = predict_model(
    final_model,
    fh=30,
    X=future_exog
)

forecast.head(10)


Unnamed: 0,y_pred
2017-01-02,10.3473
2017-01-03,10.6865
2017-01-04,11.0175
2017-01-05,11.3408
2017-01-06,11.6564
2017-01-07,11.9645
2017-01-08,12.2653
2017-01-09,12.559
2017-01-10,12.8457
2017-01-11,13.1257
