In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import statsmodels.api as sm

from statsmodels.regression.linear_model import OLS

In [80]:
directory = "data"
 
files = os.listdir(directory)

listQuadras = []

for file in files:
    if(file.endswith(".csv")):
        print("data/"+file)
        df = pd.read_csv("data/"+file)
        total = len(df)
        lil = len(df[df["AskAfter"] - df["AskBefore"] <=0 ])
        me = np.mean(df["Volume"])
        spikes = len(df[((df["AskAfter"] - df["AskBefore"] > 0) & (df["Volume"] < me * 1e-1))])
                  
        listQuadras.append(np.array(
            [
            file[:-6]
            ,total - lil
            ,lil
            # ,spikes
            ,me * 1e1
            , lil/total
            , spikes/(total - lil)
            ]
            # len(df[df["AskAfter"] - df["AskBefore"] <= 0  & df["Volume"] > 1e5])
            ))

listQuadras.sort(key=lambda x: int(x[1]))
print(listQuadras)

data/GBPRUB_TOMWP.csv
data/USD000000TODWP.csv
data/USD000UTSTOMWP.csv
data/USD000TODTOMWP.csv
data/EURUSD000TOMWP.csv
data/EUR000TODTOMWP.csv
data/EUR_RUB__TOMWP.csv
data/EUR_RUB__TODWP.csv
data/CNYRUB_TOMWP.csv
[array(['EUR000TODTOM', '73', '192', '68505660.37735848',
       '0.7245283018867924', '0.8904109589041096'], dtype='<U32'), array(['CNYRUB_TOM', '81', '86', '3938323.353293413',
       '0.5149700598802395', '0.5061728395061729'], dtype='<U32'), array(['EURUSD000TOM', '82', '833', '1288622.950819672',
       '0.9103825136612022', '0.25609756097560976'], dtype='<U32'), array(['GBPRUB_TOM', '85', '149', '74871.79487179487',
       '0.6367521367521367', '0.0'], dtype='<U32'), array(['USD000TODTOM', '369', '974', '72096798.21295607',
       '0.7252419955323902', '0.8943089430894309'], dtype='<U32'), array(['EUR_RUB__TOM', '912', '12471', '734976.4626765299',
       '0.9318538444295001', '0.32346491228070173'], dtype='<U32'), array(['EUR_RUB__TOD', '1753', '2381', '687010.1596516691

In [81]:
for q in listQuadras[::-1]:
    print(" & ".join(q) + " \\\\ \hline")

USD000UTSTOM & 2250 & 39713 & 1395885.899482878 & 0.9463813359388032 & 0.208 \\ \hline
USD000000TOD & 1834 & 11557 & 969572.8474348444 & 0.8630423418714062 & 0.31025081788440567 \\ \hline
EUR_RUB__TOD & 1753 & 2381 & 687010.1596516691 & 0.5759554910498307 & 0.41585852823730746 \\ \hline
EUR_RUB__TOM & 912 & 12471 & 734976.4626765299 & 0.9318538444295001 & 0.32346491228070173 \\ \hline
USD000TODTOM & 369 & 974 & 72096798.21295607 & 0.7252419955323902 & 0.8943089430894309 \\ \hline
GBPRUB_TOM & 85 & 149 & 74871.79487179487 & 0.6367521367521367 & 0.0 \\ \hline
EURUSD000TOM & 82 & 833 & 1288622.950819672 & 0.9103825136612022 & 0.25609756097560976 \\ \hline
CNYRUB_TOM & 81 & 86 & 3938323.353293413 & 0.5149700598802395 & 0.5061728395061729 \\ \hline
EUR000TODTOM & 73 & 192 & 68505660.37735848 & 0.7245283018867924 & 0.8904109589041096 \\ \hline


In [None]:
df = pd.read_csv("data/USD000UTSTOMWP.csv")

In [19]:
len(df[df["AskAfter"] - df["AskBefore"] > 0])

2250

In [20]:
len(df)

41963

In [15]:
def df_OLS_anal(df: pd.DataFrame):
    deltaA = np.array(df["AskAfter"] - df["AskBefore"])[1:-1]
    deltat = np.array(np.diff(df["Time"]))[1:]
    x = np.array(df["Volume"][1:-1])

    dA = np.array(deltaA / deltat)
    dx = np.array(x / deltat)
    ddx = np.array(np.diff(dx))
    ddA = np.array(np.diff(dA))

    OLSdf = pd.DataFrame({
        "SUM": ddA,
        "-rho": deltaA[:-1],
        "-rho kappa": x[1:],
        "rho (lambda + kappa)": x[:-1],
        "kappa + lambda": ddx
    })

    Regressand = OLSdf["SUM"]

    Regressors = OLSdf[["-rho", "-rho kappa", "rho (lambda + kappa)", "kappa + lambda"]]

    # Regressors = sm.add_constant(Regressors, prepend=False)

    model = OLS(Regressand, Regressors)

    res = model.fit()

    print(res.summary()) 

    rho = - res.params.iloc[0]
    kappa = - res.params.iloc[1] / rho
    lamb = res.params.iloc[2] / rho - kappa
    # lamb2 = res.params.iloc[3] - kappa
    

    print("rho = ", rho, "\nlambda = ", lamb, "\nkappa = ", kappa, "\n \n", end="")

In [None]:
def df_OLS_anal_AO(df: pd.DataFrame):
    deltaA = np.array(df["AskAfter"] - df["AskBefore"])[1:-1]
    deltat = np.array(np.diff(df["Time"]))[1:]
    x = np.array(df["Volume"][1:-1])

    dA = np.array(deltaA / deltat)
    dx = np.array(x / deltat)
    ddx = np.array(np.diff(dx))
    ddA = np.array(np.diff(dA))

    OLSdf = pd.DataFrame({
        "SUM": ddA,
        "-rho": deltaA[:-1],
        "-rho kappa": x[1:],
        "rho (lambda + kappa)": x[:-1],
        "kappa + lambda": ddx
    })

    Regressand = OLSdf["SUM"]

    Regressors = OLSdf[["-rho", "-rho kappa", "rho (lambda + kappa)", "kappa + lambda"]]

    # Regressors = sm.add_constant(Regressors, prepend=False)

    model = OLS(Regressand, Regressors)

    res = model.fit()

    print(res.summary()) 

    rho = - res.params.iloc[0]
    kappa = - res.params.iloc[1] / rho
    lamb = res.params.iloc[2] / rho - kappa
    # lamb2 = res.params.iloc[3] - kappa
    

    print("rho = ", rho, "\nlambda = ", lamb, "\nkappa = ", kappa, "\n \n", end="")

In [18]:
df = pd.read_csv("USD000UTSTOMWP copy.csv")
df_OLS_anal(df)

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.031
Model:                            OLS   Adj. R-squared:                  0.031
Method:                 Least Squares   F-statistic:                     448.6
Date:                Mon, 22 Jan 2024   Prob (F-statistic):          6.81e-287
Time:                        16:57:43   Log-Likelihood:            -3.6245e+05
No. Observations:               41960   AIC:                         7.249e+05
Df Residuals:                   41956   BIC:                         7.249e+05
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
-rho                 -1.835e+04 

In [17]:
df = pd.read_csv("EUR_RUB__TODWP.csv")
df_OLS_anal(df)

                                 OLS Regression Results                                
Dep. Variable:                    SUM   R-squared (uncentered):                   0.020
Model:                            OLS   Adj. R-squared (uncentered):              0.019
Method:                 Least Squares   F-statistic:                              21.45
Date:                Mon, 22 Jan 2024   Prob (F-statistic):                    1.55e-17
Time:                        16:57:17   Log-Likelihood:                         -36952.
No. Observations:                4131   AIC:                                  7.391e+04
Df Residuals:                    4127   BIC:                                  7.394e+04
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                           coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------

In [6]:
df = pd.read_csv("EUR_RUB__TOMWP.csv")
df_OLS_anal(df)

                                 OLS Regression Results                                
Dep. Variable:                    SUM   R-squared (uncentered):                   0.004
Model:                            OLS   Adj. R-squared (uncentered):              0.003
Method:                 Least Squares   F-statistic:                              12.10
Date:                Mon, 22 Jan 2024   Prob (F-statistic):                    8.16e-10
Time:                        09:26:13   Log-Likelihood:                     -1.0975e+05
No. Observations:               13380   AIC:                                  2.195e+05
Df Residuals:                   13376   BIC:                                  2.195e+05
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                           coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------

In [7]:
df = pd.read_csv("EUR000TODTOMWP.csv")
df_OLS_anal(df)

                                 OLS Regression Results                                
Dep. Variable:                    SUM   R-squared (uncentered):                   0.682
Model:                            OLS   Adj. R-squared (uncentered):              0.677
Method:                 Least Squares   F-statistic:                              138.6
Date:                Mon, 22 Jan 2024   Prob (F-statistic):                    4.95e-63
Time:                        09:26:32   Log-Likelihood:                         -1927.7
No. Observations:                 262   AIC:                                      3863.
Df Residuals:                     258   BIC:                                      3878.
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                           coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------

In [8]:
df = pd.read_csv("GBPRUB_TOMWP.csv")
df_OLS_anal(df)

                                 OLS Regression Results                                
Dep. Variable:                    SUM   R-squared (uncentered):                   0.435
Model:                            OLS   Adj. R-squared (uncentered):              0.425
Method:                 Least Squares   F-statistic:                              43.74
Date:                Mon, 22 Jan 2024   Prob (F-statistic):                    3.43e-27
Time:                        09:27:15   Log-Likelihood:                         -1462.7
No. Observations:                 231   AIC:                                      2933.
Df Residuals:                     227   BIC:                                      2947.
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                           coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------

In [9]:
df = pd.read_csv("USD000TODTOMWP.csv")
df_OLS_anal(df)

                                 OLS Regression Results                                
Dep. Variable:                    SUM   R-squared (uncentered):                   0.266
Model:                            OLS   Adj. R-squared (uncentered):              0.263
Method:                 Least Squares   F-statistic:                              120.8
Date:                Mon, 22 Jan 2024   Prob (F-statistic):                    4.68e-88
Time:                        09:28:11   Log-Likelihood:                         -10566.
No. Observations:                1340   AIC:                                  2.114e+04
Df Residuals:                    1336   BIC:                                  2.116e+04
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                           coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------

In [10]:
df = pd.read_csv("USD000000TODWP.csv")
df_OLS_anal(df)

                                 OLS Regression Results                                
Dep. Variable:                    SUM   R-squared (uncentered):                   0.039
Model:                            OLS   Adj. R-squared (uncentered):              0.038
Method:                 Least Squares   F-statistic:                              134.1
Date:                Mon, 22 Jan 2024   Prob (F-statistic):                   1.65e-112
Time:                        09:28:40   Log-Likelihood:                     -1.1489e+05
No. Observations:               13388   AIC:                                  2.298e+05
Df Residuals:                   13384   BIC:                                  2.298e+05
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                           coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------

In [11]:
df = pd.read_csv("USD000UTSTOMWP.csv")
df_OLS_anal(df) 

                            OLS Regression Results                            
Dep. Variable:                    SUM   R-squared:                       0.031
Model:                            OLS   Adj. R-squared:                  0.031
Method:                 Least Squares   F-statistic:                     448.6
Date:                Mon, 22 Jan 2024   Prob (F-statistic):          6.81e-287
Time:                        09:28:57   Log-Likelihood:            -3.6245e+05
No. Observations:               41960   AIC:                         7.249e+05
Df Residuals:                   41956   BIC:                         7.249e+05
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
-rho                 -1.835e+04 

In [12]:
df = pd.read_csv("EURUSD000TOMWP.csv")
df_OLS_anal(df)

                                 OLS Regression Results                                
Dep. Variable:                    SUM   R-squared (uncentered):                   0.127
Model:                            OLS   Adj. R-squared (uncentered):              0.124
Method:                 Least Squares   F-statistic:                              33.13
Date:                Mon, 22 Jan 2024   Prob (F-statistic):                    8.13e-26
Time:                        09:38:10   Log-Likelihood:                         -4369.6
No. Observations:                 912   AIC:                                      8747.
Df Residuals:                     908   BIC:                                      8766.
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                           coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------

In [46]:
df = pd.read_csv("DataWithoutPairs_USDRUB.csv")
df = df[df["Time"] > 3600]
df = df[df["Time"] < 30000]
df = df.reset_index()
df_OLS_anal(df)

                                 OLS Regression Results                                
Dep. Variable:                    SUM   R-squared (uncentered):                   0.030
Model:                            OLS   Adj. R-squared (uncentered):              0.029
Method:                 Least Squares   F-statistic:                              47.30
Date:                Sun, 21 Jan 2024   Prob (F-statistic):                    3.21e-39
Time:                        12:20:39   Log-Likelihood:                         -56672.
No. Observations:                6109   AIC:                                  1.134e+05
Df Residuals:                    6105   BIC:                                  1.134e+05
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                           coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------

In [47]:
df = pd.read_csv("DataWithoutPairs_USDRUB.csv")
df = df[df["AskAfter"] - df["AskBefore"] > 0]
df = df[df["AskAfter"] - df["AskBefore"] < 0.03]
print(len(df["AskAfter"] - df["AskBefore"]))
df_OLS_anal(df)

3330
                                 OLS Regression Results                                
Dep. Variable:                    SUM   R-squared (uncentered):                   0.194
Model:                            OLS   Adj. R-squared (uncentered):              0.193
Method:                 Least Squares   F-statistic:                              200.4
Date:                Sun, 21 Jan 2024   Prob (F-statistic):                   3.70e-154
Time:                        12:20:50   Log-Likelihood:                         -27271.
No. Observations:                3327   AIC:                                  5.455e+04
Df Residuals:                    3323   BIC:                                  5.457e+04
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                           coef    std err          t      P>|t|      [0.025      0.975]
--------------------------