In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.api as sm

from statsmodels.regression.linear_model import OLS

Функция оценивает регрессию и вычисляет по ней параметры.

In [2]:
def df_OLS_anal(df: pd.DataFrame):
    max = len(df)
    for index, row in df.iterrows():
        if index < max - 1 and abs(df.loc[index, 'Time'] - df.loc[index + 1, 'Time']) < 1e-6:
            # print(index)
            df.loc[index + 1, 'Volume'] = df.loc[index + 1, 'Volume'] + df.loc[index, 'Volume']
            df.loc[index + 1, 'AskBefore'] = df.loc[index, 'AskBefore']
            df = df.drop([index])

    deltaA = df["AskAfter"][1:] - df["AskBefore"][1:]
    deltat = np.diff(df["Time"])
    x = df["Volume"][1:]

    dA = deltaA / deltat
    dx = x / deltat

    ddx = np.diff(dx)
    ddA = np.diff(dA)

    OLSdf = pd.DataFrame({
        "SUM": ddA,
        "-rho": deltaA[:-1],
        "rho lambda": x[:-1],
        "kappa + lambda": ddx
    })

    Regressand = OLSdf["SUM"]

    Regressors = OLSdf[["-rho", "rho lambda", "kappa + lambda"]]

    Regressors = sm.add_constant(Regressors, prepend=False)

    model = OLS(Regressand, Regressors)

    res = model.fit()

    print(res.summary()) 

    rho = - res.params.iloc[0]
    lamb = res.params.iloc[1] / rho
    kappa = res.params.iloc[2] - lamb

    print("rho = ", rho, "\nlambda = ", lamb, "\nkappa = ", kappa, "\n \n", end="")

In [3]:
df = pd.read_csv("Data.csv")
df_OLS_anal(df)

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.043
Model:                            OLS   Adj. R-squared:                  0.042
Method:                 Least Squares   F-statistic:                     144.3
Date:                Thu, 18 Jan 2024   Prob (F-statistic):           1.74e-91
Time:                        10:55:51   Log-Likelihood:                -90059.
No. Observations:                9743   AIC:                         1.801e+05
Df Residuals:                    9739   BIC:                         1.802e+05
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
-rho           -1.503e+04    981.693    -15.

In [4]:
df = pd.read_csv("Data.csv")
df = df[df["Time"] < 3600]
df = df[df["Time"] > 40]
df = df.reset_index()
df_OLS_anal(df)
df.head(3)

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.074
Model:                            OLS   Adj. R-squared:                  0.073
Method:                 Least Squares   F-statistic:                     50.83
Date:                Thu, 18 Jan 2024   Prob (F-statistic):           1.35e-31
Time:                        10:55:54   Log-Likelihood:                -16746.
No. Observations:                1905   AIC:                         3.350e+04
Df Residuals:                    1901   BIC:                         3.352e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
-rho           -4505.8608   1273.953     -3.

Unnamed: 0.1,index,Unnamed: 0,Time,AskBefore,AskAfter,Volume
0,274,274,41.000686,61.857,61.857,1000.0
1,275,275,44.00095,61.85,61.857,1000.0
2,276,276,45.000454,61.857,61.86,19000.0


In [64]:
df = pd.read_csv("Data.csv")
df.Time *= 1000
df_OLS_anal(df)

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.106
Model:                            OLS   Adj. R-squared:                  0.106
Method:                 Least Squares   F-statistic:                     473.5
Date:                Wed, 17 Jan 2024   Prob (F-statistic):          9.67e-291
Time:                        12:14:46   Log-Likelihood:                -69052.
No. Observations:               11966   AIC:                         1.381e+05
Df Residuals:                   11962   BIC:                         1.381e+05
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
-rho            -104.6168     30.089     -3.

  rho = - res.params[0]
  lamb = res.params[1] / rho
  kappa = res.params[2] - lamb


In [78]:
df = pd.read_csv("Data.csv")
df = df[df["AskAfter"] - df["AskBefore"] > 0.01]
df = df.reset_index()
df = df.drop(["index"], axis=1)
df_OLS_anal(df)

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.439
Model:                            OLS   Adj. R-squared:                  0.436
Method:                 Least Squares   F-statistic:                     159.5
Date:                Wed, 17 Jan 2024   Prob (F-statistic):           2.18e-76
Time:                        14:58:43   Log-Likelihood:                -6133.8
No. Observations:                 616   AIC:                         1.228e+04
Df Residuals:                     612   BIC:                         1.229e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
-rho           -4099.9288   2230.549     -1.

In [80]:
df = pd.read_csv("Data.csv")
df = df[df["Volume"] > 100000]
df = df.reset_index()
df_OLS_anal(df)

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.202
Model:                            OLS   Adj. R-squared:                  0.199
Method:                 Least Squares   F-statistic:                     66.72
Date:                Wed, 17 Jan 2024   Prob (F-statistic):           1.82e-38
Time:                        15:00:02   Log-Likelihood:                -6411.5
No. Observations:                 795   AIC:                         1.283e+04
Df Residuals:                     791   BIC:                         1.285e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
-rho           -2.229e+04   7254.106     -3.

In [8]:
df = pd.read_csv("Data.csv")
df = df[df["Volume"] > 10000]
df = df.reset_index()
df = df[df["AskAfter"] - df["AskBefore"] < 0.1]
df = df.reset_index()
df_OLS_anal(df)

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.074
Model:                            OLS   Adj. R-squared:                  0.073
Method:                 Least Squares   F-statistic:                     104.9
Date:                Thu, 18 Jan 2024   Prob (F-statistic):           2.68e-65
Time:                        11:00:10   Log-Likelihood:                -37522.
No. Observations:                3931   AIC:                         7.505e+04
Df Residuals:                    3927   BIC:                         7.508e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
-rho            -8.36e+04   5486.128    -15.