In [1]:
import numpy as np
import pandas as pd 
from scipy.optimize import least_squares
from sklearn.model_selection import TimeSeriesSplit
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
sns.set(rc={"figure.figsize":(25, 10)})

In [3]:
dQI = pd.read_csv("DataForImpact/BIGdQI.csv")

In [6]:
dQI = dQI.loc[dQI["Loses"] > 0.]

In [7]:
dQI

Unnamed: 0,Time,PriceChange,Loses,volume
0,471,0.000000,0.500000,6.888500
1,574,0.000000,0.500000,7.029081
2,603,0.000000,0.500000,7.029081
3,681,0.000000,0.500000,7.029081
4,695,0.000000,0.500000,7.029081
...,...,...,...,...
46689,1498690,0.000000,3.836735,34.442499
46690,1499049,0.000000,0.500000,19.681428
46691,1499050,0.424610,3.500000,7.029081
46693,1499824,0.000000,10.454167,16.869795


In [13]:
dQI['Time'] = dQI['Time'].astype(float)
MI = np.array(dQI['Loses'])
dQ= np.array(dQI['volume'])
T = np.array(dQI['Time'], dtype=float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dQI['Time'] = dQI['Time'].astype(float)


In [14]:
T

array([4.710000e+02, 5.740000e+02, 6.030000e+02, ..., 1.499050e+06,
       1.499824e+06, 1.499825e+06])

### Usual OWM: $I_{t+1} = \rho I_t + \lambda Q_{t+1}$

In [15]:
cvlen = 35000
start_point = 1000

def fun1(x, mi: np.array, mi_prev: np.array, dq: np.array):
            return x[0] * mi_prev + x[1] * dq - mi

errors01 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq01 = least_squares(fun1, np.array([0.1, 0.1]), args=(MI[1:i], MI[:i - 1], dQ[1:i]))
    errors01[i - start_point] = fun1(res_lsq01.x, MI[i], MI[i - 1], dQ[i])


print("MAE: ", np.mean(abs(errors01)))

MAE:  1.5680194241734309


In [16]:
cvlen = 35000
learning_window = 100
start_point = 1000

errors02 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq02 = least_squares(fun1, np.array([0.1, 0.1]), args=(MI[i - learning_window:i], MI[i - 1 - learning_window:i - 1], dQ[i - learning_window:i]))
    errors02[i - start_point] = fun1(res_lsq02.x, MI[i], MI[i - 1], dQ[i])

print("MAE: ", np.mean(abs(errors02)))

MAE:  1.3953206206925255


### SR model: $I_t = C \sqrt{Q_t}$

In [17]:
cvlen = 35000
start_point = 1000

def fun2(x, mi: np.array, dq: np.array):
            return x[0] * np.sqrt(dq) - mi

errors03 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq11 = least_squares(fun2, np.array([0.1]), args=(MI[1:i], dQ[1:i]))
    errors03[i - start_point] = fun2(res_lsq11.x, MI[i], dQ[i])


print("MAE: ", np.mean(abs(errors03)))

MAE:  2.1225440510941165


In [18]:
cvlen = 35000
learning_window = 100
start_point = 1000

errors04 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq12 = least_squares(fun2, np.array([0.1, 0.1]), args=(MI[i - learning_window:i], dQ[i - learning_window:i]))
    errors04[i - start_point] = fun2(res_lsq12.x, MI[i], dQ[i])

print("MAE: ", np.mean(abs(errors04)))

MAE:  1.5385424276542083


### Experimrntal Combo of SRM and OWM: $I_{t+1} = \rho I_t + \lambda \sqrt{Q_{t+1}}$

In [19]:
cvlen = 35000
start_point = 1000

def fun3(x, mi: np.array, mi_prev: np.array, dq: np.array):
            return x[0] * mi_prev + x[1] * np.sqrt(dq) - mi

errors01 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq01 = least_squares(fun3, np.array([0.1, 0.1]), args=(MI[1:i], MI[:i - 1], dQ[1:i]))
    errors01[i - start_point] = fun3(res_lsq01.x, MI[i], MI[i - 1], dQ[i])


print("MAE: ", np.mean(abs(errors01)))

MAE:  1.9932173237366233


In [20]:
cvlen = 35000
learning_window = 100
start_point = 1000

errors02 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq02 = least_squares(fun3, np.array([0.1, 0.1]), args=(MI[i - learning_window:i], MI[i - 1 - learning_window:i - 1], dQ[i - learning_window:i]))
    errors02[i - start_point] = fun3(res_lsq02.x, MI[i], MI[i - 1], dQ[i])

print("MAE: ", np.mean(abs(errors02)))

MAE:  1.5543257789006262


### Experimental model from OWM: $\frac{I_{i+1} - I_i}{\Delta t _{i+1}} = \rho I_i + \lambda \frac{Q_{i+1}}{\Delta t _{i+1}}$

In [21]:
dt = (T[1:] - T[:-1])
dIdt = (MI[1:] - MI[:-1]) / dt
Qdt = dQ[1:] / dt
Ii = MI[:-1]

In [22]:
cvlen = 35000
start_point = 1000

def fun3(x, dIdt: np.array, mi_prev: np.array, Qdt: np.array):
            return x[0] * mi_prev + x[1] * Qdt - dIdt

errors01 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq01 = least_squares(fun3, np.array([0.1, 0.1]), args=(dIdt[:i], Ii[:i], Qdt[:i]))
    errors01[i - start_point] = Ii[i] * (1 + dt[i] * res_lsq01.x[0]) + res_lsq01.x[1] * Qdt[i] - Ii[i + 1]


print("MAE: ", np.mean(abs(errors01)))

MAE:  10.919683027663334


In [23]:
cvlen = 35000
learning_window = 100
start_point = 1000
def fun3(x, dIdt: np.array, mi_prev: np.array, Qdt: np.array):
            return x[0] * mi_prev + x[1] * Qdt - dIdt

errors01 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq01 = least_squares(fun3, np.array([0.1, 0.1]), args=(dIdt[i - learning_window:i], Ii[i - learning_window:i], Qdt[i - learning_window:i]))
    errors01[i - start_point] = Ii[i] * (1 + dt[i] * res_lsq01.x[0]) + res_lsq01.x[1] * Qdt[i] - Ii[i + 1]


print("MAE: ", np.mean(abs(errors01)))

MAE:  12.603488763904165


### Experimental model from our intuition: $\frac{y_{i + 1} - y_{i}}{\Delta t_{i+1}} = \rho y_i + \lambda$

In [24]:
y = MI / dQ

In [27]:
cvlen = 35000
start_point = 1000

def fun1(x, y: np.array, y_prev: np.array, dt: np.array):
            return y_prev * (1 + x[0] * dt) + x[1] * dt - y

errors01 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq01 = least_squares(fun1, np.array([0.1, 0.1]), args=(y[1:i], y[:i - 1], dt[1:i]))
    errors01[i - start_point] = fun1(res_lsq01.x, y[i], y[i - 1], dt[i]) * dQ[i]


print("MAE: ", np.mean(abs(errors01)))

MAE:  4.623520484527892


In [30]:
cvlen = 35000
learning_window = 500
start_point = 2000

def fun1(x, y: np.array, y_prev: np.array, dt: np.array):
            return y_prev * (1 + x[0] * dt) + x[1] * dt - y

errors01 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq01 = least_squares(fun1, np.array([0.1, 0.1]), args=(y[i - learning_window:i], y[i - 1 - learning_window:i - 1], dt[i - learning_window:i]))
    errors01[i - start_point] = fun1(res_lsq01.x, y[i], y[i - 1], dt[i]) * dQ[i]


print("MAE: ", np.mean(abs(errors01)))

MAE:  4.0392173162843426


### GOW model

In [32]:
cvlen = 35000
start_point = 1000

def fun1(x, mi: np.array, mi_prev: np.array, dq: np.array, dt: np.array):
            return np.power(x[0], dt) * mi_prev + x[1] * dq - mi

errors01 = np.full((cvlen - start_point, ), 0.)

for i in range(start_point, cvlen):
    res_lsq01 = least_squares(fun1, np.array([0.1, 0.1]), args=(MI[1:i], MI[:i - 1], dQ[1:i], dt[1:i]))
    errors01[i - start_point] = fun1(res_lsq01.x, MI[i], MI[i - 1], dQ[i], dt[i])


print("MAE: ", np.mean(abs(errors01)))

MAE:  1.5712823852127695
