# Pycaret


In [27]:
import pandas as pd
import numpy as nump
data = pd.read_excel('PVC.xls', sheet_name='Para 2024')
data['Date'] = pd.to_datetime(data['Date'])
data.head()

Unnamed: 0,Date,PVC BRL/tonne
0,2023-01-06,5512.549
1,2023-01-13,5349.321
2,2023-01-20,5463.852
3,2023-01-27,5346.214
4,2023-02-03,5372.849


In [28]:
# create 12 month moving average
data['MA12'] = data['PVC BRL/tonne'].rolling(12).mean()

# plot the data and MA
import plotly.express as px
fig = px.line(data, x="Date", y=["PVC BRL/tonne", "MA12"], template = 'plotly_dark')
fig.show()

In [29]:
# extract month and year from dates**
data['Month'] = [i.month for i in data['Date']]
data['Year'] = [i.year for i in data['Date']]

# create a sequence of numbers
data['Series'] = nump.arange(1,len(data)+1)

# drop unnecessary columns and re-arrange
data.drop(['Date', 'MA12'], axis=1, inplace=True)
data = data[['Series', 'Year', 'Month', 'PVC BRL/tonne']] 

# check the head of the dataset**
data.head()

Unnamed: 0,Series,Year,Month,PVC BRL/tonne
0,1,2023,1,5512.549
1,2,2023,1,5349.321
2,3,2023,1,5463.852
3,4,2023,1,5346.214
4,5,2023,2,5372.849


In [30]:
# split data into train-test set
train = data[data['Month'] < 10]
test = data[data['Month'] >= 10]

In [31]:
# import the regression module**
from pycaret.regression import *

# initialize setup**
s = setup(data = train, test_data = test, target = 'PVC BRL/tonne', fold_strategy = 'timeseries', numeric_features = ['Month', 'Series'], fold = 3, transform_target = True, session_id = 123, data_split_shuffle = False, fold_shuffle = False)

In [32]:
best = compare_models(sort = 'MAE')

Processing:   0%|          | 0/85 [00:00<?, ?it/s]

In [40]:
prediction_holdout = predict_model(best);

AttributeError: 'CatBoostRegressor' object has no attribute '_init_params'

In [41]:
# generate predictions on the original dataset**
predictions = predict_model(best, data=data)


AttributeError: 'CatBoostRegressor' object has no attribute '_init_params'

In [38]:
aux = pd.read_excel('PVC.xls', sheet_name='Para 2024')
aux['Date'] = pd.to_datetime(aux['Date'])

# Add date column to predictions
predictions['Date'] = aux['Date']

# check the result
data.head()
predictions.head()


Unnamed: 0,Series,Year,Month,PVC BRL/tonne,prediction_label,Date
0,1,2023,1,5512.548828,5512.543457,2023-01-06
1,2,2023,1,5349.320801,5349.325195,2023-01-13
2,3,2023,1,5463.852051,5463.845703,2023-01-20
3,4,2023,1,5346.213867,5346.219238,2023-01-27
4,5,2023,2,5372.849121,5372.848633,2023-02-03


In [39]:
# line plot**
fig = px.line(predictions, x='Date', y=["PVC BRL/tonne", "prediction_label"], template = 'plotly_dark')

# add a vertical rectange for test-set separation**
fig.add_vrect(x0="2023-01-01", x1="2023-12-01", fillcolor="grey", opacity=0.25, line_width=0)

fig.show()