In [1]:
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv('/content/drive/MyDrive/Time_series_analysis_and_forecast_DATASET.csv')

In [4]:
print(df.dtypes)

FullDate      object
Tmax         float64
SysLoad      float64
GasPrice     float64
ElecPrice    float64
dtype: object


In [5]:
df.head()

Unnamed: 0,FullDate,Tmax,SysLoad,GasPrice,ElecPrice
0,2010-09-01 00:00:00,15.8,1688.215,3.69,23.71
1,2010-09-01 01:00:00,15.8,1669.195,3.69,22.635
2,2010-09-01 02:00:00,15.8,1491.98,3.69,22.565
3,2010-09-01 03:00:00,15.8,1330.76,3.69,18.91
4,2010-09-01 04:00:00,15.8,1247.94,3.69,18.03


In [6]:
df.tail()

Unnamed: 0,FullDate,Tmax,SysLoad,GasPrice,ElecPrice
92011,2021-02-28 19:00:00,24.3,449.63625,5.4848,117.925
92012,2021-02-28 20:00:00,24.3,815.59975,5.4848,87.88
92013,2021-02-28 21:00:00,24.3,745.12325,5.4848,62.06
92014,2021-02-28 22:00:00,24.3,694.9515,5.4848,62.0
92015,2021-02-28 23:00:00,24.3,714.6963,5.4848,62.955


In [7]:
print(df.columns)

Index(['FullDate', 'Tmax', 'SysLoad', 'GasPrice', 'ElecPrice'], dtype='object')


In [8]:
# Check for missing values
print(df.isnull().sum())

FullDate     0
Tmax         0
SysLoad      0
GasPrice     0
ElecPrice    0
dtype: int64


In [9]:
df.describe()

Unnamed: 0,Tmax,SysLoad,GasPrice,ElecPrice
count,92016.0,92016.0,92016.0,92016.0
mean,231.696688,797.623357,6.016163,65.798356
std,4562.128748,613.520156,2.608116,213.144084
min,9.9,-1596.8101,0.0,-924.82
25%,17.3,417.1198,3.9369,31.54375
50%,21.9,856.467925,5.1656,48.715
75%,27.6,1221.293675,8.0233,76.495
max,99999.9,3264.1663,29.99,14700.0


In [10]:
df.info

<bound method DataFrame.info of                   FullDate  Tmax     SysLoad  GasPrice  ElecPrice
0      2010-09-01 00:00:00  15.8  1688.21500    3.6900     23.710
1      2010-09-01 01:00:00  15.8  1669.19500    3.6900     22.635
2      2010-09-01 02:00:00  15.8  1491.98000    3.6900     22.565
3      2010-09-01 03:00:00  15.8  1330.76000    3.6900     18.910
4      2010-09-01 04:00:00  15.8  1247.94000    3.6900     18.030
...                    ...   ...         ...       ...        ...
92011  2021-02-28 19:00:00  24.3   449.63625    5.4848    117.925
92012  2021-02-28 20:00:00  24.3   815.59975    5.4848     87.880
92013  2021-02-28 21:00:00  24.3   745.12325    5.4848     62.060
92014  2021-02-28 22:00:00  24.3   694.95150    5.4848     62.000
92015  2021-02-28 23:00:00  24.3   714.69630    5.4848     62.955

[92016 rows x 5 columns]>

In [11]:
# Rename columns
df = df.rename(columns={'FullDate': 'ds', 'ElecPrice': 'y'})

In [12]:
print(df.columns)

Index(['ds', 'Tmax', 'SysLoad', 'GasPrice', 'y'], dtype='object')


In [13]:
train = df.iloc[:2757]
test = df.iloc[2757:]

print(len(df))

92016


In [14]:
# Create model
model = Prophet(daily_seasonality=True)

In [15]:
# Fit model on training data
model.fit(train)

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmp545tfls5/_11p48h1.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp545tfls5/eh_sbq0t.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=89655', 'data', 'file=/tmp/tmp545tfls5/_11p48h1.json', 'init=/tmp/tmp545tfls5/eh_sbq0t.json', 'output', 'file=/tmp/tmp545tfls5/prophet_modellmax645d/prophet_model-20230801222757.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
22:27:57 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
22:27:57 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


<prophet.forecaster.Prophet at 0x7aa3a2c48820>

In [16]:
# Make predictions on test data
future = model.make_future_dataframe(periods=len(test))
forecast = model.predict(future)

In [17]:
# Print forecast
print(forecast.tail())

                       ds         trend    yhat_lower    yhat_upper  \
92011 2255-05-09 20:00:00 -24052.558912 -3.678069e+06  3.432682e+06   
92012 2255-05-10 20:00:00 -24052.828545 -3.677822e+06  3.432850e+06   
92013 2255-05-11 20:00:00 -24053.098178 -3.677971e+06  3.432720e+06   
92014 2255-05-12 20:00:00 -24053.367811 -3.677819e+06  3.432549e+06   
92015 2255-05-13 20:00:00 -24053.637444 -3.677799e+06  3.432568e+06   

        trend_lower   trend_upper  additive_terms  additive_terms_lower  \
92011 -3.678000e+06  3.432689e+06       16.523652             16.523652   
92012 -3.677987e+06  3.432684e+06       -4.215353             -4.215353   
92013 -3.677973e+06  3.432679e+06        3.385095              3.385095   
92014 -3.677960e+06  3.432674e+06      -11.565838            -11.565838   
92015 -3.677946e+06  3.432669e+06       -3.984759             -3.984759   

       additive_terms_upper     daily  daily_lower  daily_upper     weekly  \
92011             16.523652  1.624289     1.

In [18]:
#Rename all columns
df = df.rename(columns={'SysLoad': 'add1'})
df = df.rename(columns={'Tmax': 'add2'})
df = df.rename(columns={'GasPrice': 'add3'})
df = df.rename(columns={'FullDate': 'ds'})
df = df.rename(columns={'ElecPrice': 'y'})


print(df.head())

                    ds  add2      add1  add3       y
0  2010-09-01 00:00:00  15.8  1688.215  3.69  23.710
1  2010-09-01 01:00:00  15.8  1669.195  3.69  22.635
2  2010-09-01 02:00:00  15.8  1491.980  3.69  22.565
3  2010-09-01 03:00:00  15.8  1330.760  3.69  18.910
4  2010-09-01 04:00:00  15.8  1247.940  3.69  18.030


In [19]:
# Create train and test sets
train = df.iloc[:2757]
test = df.iloc[2757:]

In [20]:
# Create model with regressors
model = Prophet(daily_seasonality=True)
model.add_regressor('add1')
model.add_regressor('add2')
model.add_regressor('add3')


<prophet.forecaster.Prophet at 0x7aa3a3bb0220>

In [21]:
# Fit model on training data
model.fit(train)


INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmp545tfls5/teklkc4w.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp545tfls5/sw5y58fn.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=7534', 'data', 'file=/tmp/tmp545tfls5/teklkc4w.json', 'init=/tmp/tmp545tfls5/sw5y58fn.json', 'output', 'file=/tmp/tmp545tfls5/prophet_modeld6udoqcl/prophet_model-20230801222935.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
22:29:35 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
22:29:36 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


<prophet.forecaster.Prophet at 0x7aa3a3bb0220>

In [22]:
# Make predictions on test data
future = model.make_future_dataframe(periods=len(test))
future['add1'] = df['add1']
future['add2'] = df['add2']
future['add3'] = df['add3']
forecast = model.predict(future)

In [23]:
# Print forecast
print(forecast.tail())

                       ds         trend    yhat_lower    yhat_upper  \
92011 2255-05-09 20:00:00  57232.659746 -3.489662e+06  3.341177e+06   
92012 2255-05-10 20:00:00  57233.300572 -3.489755e+06  3.341312e+06   
92013 2255-05-11 20:00:00  57233.941397 -3.489923e+06  3.341424e+06   
92014 2255-05-12 20:00:00  57234.582223 -3.489955e+06  3.341534e+06   
92015 2255-05-13 20:00:00  57235.223049 -3.489748e+06  3.341508e+06   

        trend_lower   trend_upper       add1  add1_lower  add1_upper  \
92011 -3.489655e+06  3.341276e+06 -33.824475  -33.824475  -33.824475   
92012 -3.489714e+06  3.341347e+06 -20.880980  -20.880980  -20.880980   
92013 -3.489773e+06  3.341418e+06 -23.373612  -23.373612  -23.373612   
92014 -3.489831e+06  3.341490e+06 -25.148100  -25.148100  -25.148100   
92015 -3.489890e+06  3.341561e+06 -24.449761  -24.449761  -24.449761   

           add2  ...  extra_regressors_additive  \
92011 -0.138152  ...                 -32.225199   
92012 -0.138152  ...                 -