In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.api as sm

from statsmodels.regression.linear_model import OLS

In [3]:
df = pd.read_csv("DataWithoutPairs_USDRUB.csv")

In [17]:
def df_OLS_anal(df: pd.DataFrame):
    deltaA = df["AskAfter"][1:] - df["AskBefore"][1:]
    deltat = np.diff(df["Time"])
    x = df["Volume"][1:]

    dA = deltaA / deltat
    dx = x / deltat

    ddx = np.diff(dx)
    ddA = np.diff(dA)

    OLSdf = pd.DataFrame({
        "SUM": ddA,
        "-rho": deltaA[:-1].values,
        "-rho kappa": x[1:].values,
        "rho (lambda + kappa)": x[:-1].values,
        "kappa + lambda": ddx
    })

    Regressand = OLSdf["SUM"]

    Regressors = OLSdf[["-rho", "-rho kappa", "rho (lambda + kappa)", "kappa + lambda"]]

    Regressors = sm.add_constant(Regressors, prepend=False)

    model = OLS(Regressand, Regressors)

    res = model.fit()

    print(res.summary()) 

    rho = - res.params.iloc[0]
    kappa = - res.params.iloc[1] / rho
    lamb = res.params.iloc[2] / rho - kappa
    lamb2 = res.params.iloc[3] - kappa
    

    print("rho = ", rho, "\nlambda = ", lamb,  " lambda2 = ", lamb2, "\nkappa = ", kappa, "\n \n", end="")

In [18]:
df = pd.read_csv("DataWithoutPairs_USDRUB.csv")
df_OLS_anal(df)

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.044
Model:                            OLS   Adj. R-squared:                  0.044
Method:                 Least Squares   F-statistic:                     112.3
Date:                Sat, 20 Jan 2024   Prob (F-statistic):           9.67e-94
Time:                        19:47:29   Log-Likelihood:                -90051.
No. Observations:                9743   AIC:                         1.801e+05
Df Residuals:                    9738   BIC:                         1.801e+05
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
-rho                 -1.505e+04 

In [19]:
df = pd.read_csv("DataWithoutPairs_USDRUB.csv")
df = df[df["Time"] < 3600]
df = df[df["Time"] > 40]
df = df.reset_index()
df_OLS_anal(df)

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.075
Model:                            OLS   Adj. R-squared:                  0.073
Method:                 Least Squares   F-statistic:                     38.27
Date:                Sat, 20 Jan 2024   Prob (F-statistic):           7.77e-31
Time:                        19:47:33   Log-Likelihood:                -16745.
No. Observations:                1905   AIC:                         3.350e+04
Df Residuals:                    1900   BIC:                         3.353e+04
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
-rho                 -4529.6384 

In [34]:
df = pd.read_csv("DataWithoutPairs_USDRUB.csv")
df = df[df["AskAfter"] - df["AskBefore"] > 0]
df = df[df["AskAfter"] - df["AskBefore"] < 0.03]
print(len(df["AskAfter"] - df["AskBefore"]))
df_OLS_anal(df)

3330
                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.162
Model:                            OLS   Adj. R-squared:                  0.161
Method:                 Least Squares   F-statistic:                     160.6
Date:                Sat, 20 Jan 2024   Prob (F-statistic):          8.01e-126
Time:                        19:53:49   Log-Likelihood:                -27431.
No. Observations:                3328   AIC:                         5.487e+04
Df Residuals:                    3323   BIC:                         5.490e+04
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
-rho                 -8259.