In [None]:
import numpy as np
import itertools
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm

from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.read_csv("dataset_train.csv")

In [None]:
df = df.drop(columns=['product'])

In [None]:
df.head()

Unnamed: 0,negotiation_date,sold_price
0,07/01/2014,308.33
1,08/01/2014,300.67
2,09/01/2014,295.0
3,10/01/2014,324.0
4,11/01/2014,250.71


In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
df['sold_price'] = scaler.fit_transform(df['sold_price'].values.reshape(-1, 1))

In [None]:
df['negotiation_date'] = pd.to_datetime(df['negotiation_date'], dayfirst=True)
df['negotiation_date'] = pd.to_datetime(df['negotiation_date'],yearfirst=True, format='%Y-%m-%d')

In [None]:
y = df.set_index(['negotiation_date'])
y.head()

Unnamed: 0_level_0,sold_price
negotiation_date,Unnamed: 1_level_1
2014-01-07,0.233329
2014-01-08,0.222386
2014-01-09,0.214286
2014-01-10,0.255714
2014-01-11,0.151014


In [None]:
# decomposition = sm.tsa.seasonal_decompose(df['sold_price'], model='additive')

In [None]:
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
print('Examples of parameter for SARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

Examples of parameter for SARIMA...
SARIMAX: (0, 0, 1) x (0, 0, 1, 12)
SARIMAX: (0, 0, 1) x (0, 1, 0, 12)
SARIMAX: (0, 1, 0) x (0, 1, 1, 12)
SARIMAX: (0, 1, 0) x (1, 0, 0, 12)


In [None]:
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(y,order=param,seasonal_order=param_seasonal,enforce_stationarity=False,enforce_invertibility=False)
            results = mod.fit()
            print('ARIMA{}x{}12 - AIC:{}'.format(param,param_seasonal,results.aic))
        except: 
            continue



ARIMA(0, 0, 0)x(0, 0, 0, 12)12 - AIC:1509.9519998312749
ARIMA(0, 0, 0)x(0, 0, 1, 12)12 - AIC:711.4991596905292
ARIMA(0, 0, 0)x(0, 1, 0, 12)12 - AIC:-473.96904100130433




ARIMA(0, 0, 0)x(0, 1, 1, 12)12 - AIC:-1385.1414209974632




ARIMA(0, 0, 0)x(1, 0, 0, 12)12 - AIC:-597.5865771716374




ARIMA(0, 0, 0)x(1, 0, 1, 12)12 - AIC:-1403.3495686418098




ARIMA(0, 0, 0)x(1, 1, 0, 12)12 - AIC:-1392.6799539449712




ARIMA(0, 0, 0)x(1, 1, 1, 12)12 - AIC:-1495.81256804041
ARIMA(0, 0, 1)x(0, 0, 0, 12)12 - AIC:-56.953742817098856




ARIMA(0, 0, 1)x(0, 0, 1, 12)12 - AIC:-569.2762822642248




ARIMA(0, 0, 1)x(0, 1, 0, 12)12 - AIC:-1189.0348000349832




ARIMA(0, 0, 1)x(0, 1, 1, 12)12 - AIC:-2097.6886274622198




ARIMA(0, 0, 1)x(1, 0, 0, 12)12 - AIC:-1373.519207445294




ARIMA(0, 0, 1)x(1, 0, 1, 12)12 - AIC:-2119.5494932969023




ARIMA(0, 0, 1)x(1, 1, 0, 12)12 - AIC:-1966.5093670377369




ARIMA(0, 0, 1)x(1, 1, 1, 12)12 - AIC:-2144.692636399099
ARIMA(0, 1, 0)x(0, 0, 0, 12)12 - AIC:-2501.335224451647




ARIMA(0, 1, 0)x(0, 0, 1, 12)12 - AIC:-2471.1946837473583
ARIMA(0, 1, 0)x(0, 1, 0, 12)12 - AIC:-1326.698061040548




ARIMA(0, 1, 0)x(0, 1, 1, 12)12 - AIC:-2405.655497614652
ARIMA(0, 1, 0)x(1, 0, 0, 12)12 - AIC:-2473.66112108113




ARIMA(0, 1, 0)x(1, 0, 1, 12)12 - AIC:-2471.664559460012




ARIMA(0, 1, 0)x(1, 1, 0, 12)12 - AIC:-1855.4641739951387




ARIMA(0, 1, 0)x(1, 1, 1, 12)12 - AIC:-2394.481342832046
ARIMA(0, 1, 1)x(0, 0, 0, 12)12 - AIC:-2554.187573399644




ARIMA(0, 1, 1)x(0, 0, 1, 12)12 - AIC:-2524.036065446825
ARIMA(0, 1, 1)x(0, 1, 0, 12)12 - AIC:-1368.7311188053934




ARIMA(0, 1, 1)x(0, 1, 1, 12)12 - AIC:-2458.5029651935974




ARIMA(0, 1, 1)x(1, 0, 0, 12)12 - AIC:-2529.011134815938




ARIMA(0, 1, 1)x(1, 0, 1, 12)12 - AIC:-2522.887686380365




ARIMA(0, 1, 1)x(1, 1, 0, 12)12 - AIC:-1898.0191850809285




ARIMA(0, 1, 1)x(1, 1, 1, 12)12 - AIC:-2448.7394481854335
ARIMA(1, 0, 0)x(0, 0, 0, 12)12 - AIC:-2537.3989776562767




ARIMA(1, 0, 0)x(0, 0, 1, 12)12 - AIC:-2506.928009382517
ARIMA(1, 0, 0)x(0, 1, 0, 12)12 - AIC:-1586.1028156521675




ARIMA(1, 0, 0)x(0, 1, 1, 12)12 - AIC:-2571.569545704152




ARIMA(1, 0, 0)x(1, 0, 0, 12)12 - AIC:-2506.929785056208




ARIMA(1, 0, 0)x(1, 0, 1, 12)12 - AIC:-2591.340926775517




ARIMA(1, 0, 0)x(1, 1, 0, 12)12 - AIC:-2180.521870121872




ARIMA(1, 0, 0)x(1, 1, 1, 12)12 - AIC:-2561.9273686327524
ARIMA(1, 0, 1)x(0, 0, 0, 12)12 - AIC:-2576.850905807506




ARIMA(1, 0, 1)x(0, 0, 1, 12)12 - AIC:-2546.092670993734
ARIMA(1, 0, 1)x(0, 1, 0, 12)12 - AIC:-1582.164450278673




ARIMA(1, 0, 1)x(0, 1, 1, 12)12 - AIC:-2574.5113568886027




ARIMA(1, 0, 1)x(1, 0, 0, 12)12 - AIC:-2548.593978244613




ARIMA(1, 0, 1)x(1, 0, 1, 12)12 - AIC:-2593.0503468648812




ARIMA(1, 0, 1)x(1, 1, 0, 12)12 - AIC:-2181.595548243299
ARIMA(1, 0, 1)x(1, 1, 1, 12)12 - AIC:-2564.3820982041416
ARIMA(1, 1, 0)x(0, 0, 0, 12)12 - AIC:-2545.460206803701




ARIMA(1, 1, 0)x(0, 0, 1, 12)12 - AIC:-2515.2236191247575
ARIMA(1, 1, 0)x(0, 1, 0, 12)12 - AIC:-1362.7304090124398




ARIMA(1, 1, 0)x(0, 1, 1, 12)12 - AIC:-2449.367310660872




ARIMA(1, 1, 0)x(1, 0, 0, 12)12 - AIC:-2515.300779368028




ARIMA(1, 1, 0)x(1, 0, 1, 12)12 - AIC:-2513.871054505968




ARIMA(1, 1, 0)x(1, 1, 0, 12)12 - AIC:-1885.6912647722656




ARIMA(1, 1, 0)x(1, 1, 1, 12)12 - AIC:-2439.074399956432




ARIMA(1, 1, 1)x(0, 0, 0, 12)12 - AIC:-2738.7056153254325




ARIMA(1, 1, 1)x(0, 0, 1, 12)12 - AIC:-2708.3446273406234




ARIMA(1, 1, 1)x(0, 1, 0, 12)12 - AIC:-1573.4469604811095




ARIMA(1, 1, 1)x(0, 1, 1, 12)12 - AIC:-2638.434232622767




ARIMA(1, 1, 1)x(1, 0, 0, 12)12 - AIC:-2710.8279579673654




ARIMA(1, 1, 1)x(1, 0, 1, 12)12 - AIC:-2713.953991489172




ARIMA(1, 1, 1)x(1, 1, 0, 12)12 - AIC:-2168.798208566368




ARIMA(1, 1, 1)x(1, 1, 1, 12)12 - AIC:-2629.3511784772772


In [None]:
mod = sm.tsa.statespace.SARIMAX(y,
                                order=(0, 0, 1),
                                seasonal_order=(1, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])



                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ma.L1          0.5344      0.015     36.022      0.000       0.505       0.563
ar.S.L12      -0.2512      0.024    -10.631      0.000      -0.298      -0.205
ma.S.L12      -0.6376      0.020    -32.628      0.000      -0.676      -0.599
sigma2         0.0153      0.000     47.109      0.000       0.015       0.016


In [None]:
pred = results.get_prediction(start=pd.to_datetime('2019-07-01'), dynamic=False)
pred_ci = pred.conf_int()

In [None]:
y_forecasted = pred.predicted_mean
y_truth = y['2019-07-01':]
mse = ((y_forecasted - y_truth) ** 2).mean()
print('The Mean Squared Error is {}'.format(round(mse, 2)))
print('The Root Mean Squared Error is {}'.format(round(np.sqrt(mse), 2)))

The Mean Squared Error is 2019-07-01 00:00:00   NaN
2019-07-02 00:00:00   NaN
2019-07-03 00:00:00   NaN
2019-07-05 00:00:00   NaN
2019-07-06 00:00:00   NaN
2019-07-07 00:00:00   NaN
2019-07-08 00:00:00   NaN
2019-07-09 00:00:00   NaN
2019-07-13 00:00:00   NaN
2019-07-14 00:00:00   NaN
2019-07-15 00:00:00   NaN
2019-07-16 00:00:00   NaN
2019-07-17 00:00:00   NaN
2019-07-18 00:00:00   NaN
2019-07-19 00:00:00   NaN
2019-07-20 00:00:00   NaN
2019-07-21 00:00:00   NaN
2019-07-22 00:00:00   NaN
2019-07-23 00:00:00   NaN
2019-07-24 00:00:00   NaN
2019-07-25 00:00:00   NaN
2019-07-26 00:00:00   NaN
2019-07-27 00:00:00   NaN
2019-07-29 00:00:00   NaN
2019-07-30 00:00:00   NaN
2019-07-31 00:00:00   NaN
sold_price            NaN
dtype: float64
The Root Mean Squared Error is 2019-07-01 00:00:00   NaN
2019-07-02 00:00:00   NaN
2019-07-03 00:00:00   NaN
2019-07-05 00:00:00   NaN
2019-07-06 00:00:00   NaN
2019-07-07 00:00:00   NaN
2019-07-08 00:00:00   NaN
2019-07-09 00:00:00   NaN
2019-07-13 00:00:0

In [None]:
y_forecasted

negotiation_date
2019-07-01    0.437610
2019-07-02    0.460310
2019-07-03    0.390274
2019-07-05    0.496544
2019-07-06    0.362436
2019-07-07    0.278358
2019-07-08    0.337227
2019-07-09    0.297662
2019-07-13    0.310849
2019-07-14    0.283368
2019-07-15    0.319258
2019-07-16    0.355195
2019-07-17    0.397910
2019-07-18    0.320018
2019-07-19    0.322669
2019-07-20    0.298750
2019-07-21    0.292090
2019-07-22    0.272834
2019-07-23    0.302597
2019-07-24    0.285086
2019-07-25    0.279156
2019-07-26    0.276835
2019-07-27    0.311405
2019-07-29    0.311881
2019-07-30    0.396230
2019-07-31    0.356520
dtype: float64

In [None]:
y[-30:]

Unnamed: 0_level_0,sold_price
negotiation_date,Unnamed: 1_level_1
2019-06-27,0.245543
2019-06-28,0.257143
2019-06-29,0.278571
2019-06-30,0.245243
2019-07-01,0.471429
2019-07-02,0.519286
2019-07-03,0.667857
2019-07-05,0.492857
2019-07-06,0.237057
2019-07-07,0.241429


In [None]:
p = pd.DataFrame(y_forecasted, columns=['pred_price'])

In [None]:
result = pd.DataFrame(y[-26:])
result['pred_price'] = p['pred_price']
result.dtypes

sold_price    float64
pred_price    float64
dtype: object

In [None]:
result

Unnamed: 0_level_0,sold_price,pred_price
negotiation_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-07-01,0.471429,0.43761
2019-07-02,0.519286,0.46031
2019-07-03,0.667857,0.390274
2019-07-05,0.492857,0.496544
2019-07-06,0.237057,0.362436
2019-07-07,0.241429,0.278358
2019-07-08,0.257143,0.337227
2019-07-09,0.258929,0.297662
2019-07-13,0.246157,0.310849
2019-07-14,0.214286,0.283368


In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
mse = mean_squared_error(result['sold_price'], result['pred_price'], squared=False)
print(mse)

0.008506053808813764
