In [2]:
import numpy as np
import pandas as pd
import cvxpy as cp
import matplotlib.pyplot as plt
import tqdm
from tqdm import tqdm
import multiprocessing as mp
import seaborn as sns
from tqdm import trange

from cvx.stat_arb.ccp import *
from cvx.stat_arb.utils import *
from cvx.stat_arb.backtest import *
from cvx.stat_arb.metrics import *
from cvx.stat_arb.portfolio import *

sns.set()

%load_ext autoreload
%autoreload 2

In [3]:
data = pd.read_csv("/Users/kasper/Documents/Stanford/Research/Data/SP500/asset_data_2000_to_2023.csv")
data = data.drop(["TICKER", "PERMCO"], axis=1)
prices = pd.pivot_table(data, values='PRC', index='date', columns='PERMNO', fill_value=None)
prices.index = pd.to_datetime(prices.index, format="%Y-%m-%d")

prices_train = prices.loc["2022-01-01":"2022-07-31"].iloc[:,:]
prices_val = prices.loc["2022-08-01":"2022-10-31"].iloc[:,:]
prices_test = prices.loc["2022-10-01":"2022-12-31"].iloc[:,:]

prices_train_val = pd.concat([prices_train, prices_val]) # For later use

In [43]:
state = State_vectorized(prices_train, K=100, P_max=10, spread_max=1)
state.reset()
state.iterate()


1
                                     CVXPY                                     
                                     v1.3.1                                    
(CVXPY) Apr 20 01:10:50 PM: Your problem has 34500 variables, 3 constraints, and 14400 parameters.
(CVXPY) Apr 20 01:10:50 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Apr 20 01:10:50 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Apr 20 01:10:50 PM: Compiling problem (target solver=MOSEK).
(CVXPY) Apr 20 01:10:50 PM: Reduction chain: FlipObjective -> Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> MOSEK
(CVXPY) Apr 20 01:10:50 PM: Applying reduction FlipObjective
(CVXPY) Apr 20 01:10:50 PM: Applying 



(CVXPY) Apr 20 01:11:04 PM: Applying reduction MOSEK
(CVXPY) Apr 20 01:12:26 PM: Finished problem compilation (took 9.610e+01 seconds).
(CVXPY) Apr 20 01:12:26 PM: (Subsequent compilations of this problem, using the same arguments, should take less time.)
-------------------------------------------------------------------------------
                                Numerical solver                               
-------------------------------------------------------------------------------
(CVXPY) Apr 20 01:12:26 PM: Invoking solver MOSEK  to obtain a solution.


(CVXPY) Apr 20 01:12:27 PM: Problem
(CVXPY) Apr 20 01:12:27 PM:   Name                   :                 
(CVXPY) Apr 20 01:12:27 PM:   Objective sense        : maximize        
(CVXPY) Apr 20 01:12:27 PM:   Type                   : LO (linear optimization problem)
(CVXPY) Apr 20 01:12:27 PM:   Constraints            : 68900           
(CVXPY) Apr 20 01:12:27 PM:   Affine conic cons.     : 0               
(CVXPY) Apr 20 01

<cvx.stat_arb.ccp.State_vectorized at 0x7fd6aae197c0>

In [42]:
state.s.value

array([[-1.29492045e-04,  1.73467562e-05, -7.24936725e-05, ...,
        -1.35122531e-04,  4.40454507e-05, -7.12568634e-05],
       [ 1.22103227e-05,  8.00768069e-05, -6.54639620e-05, ...,
        -1.48811662e-07,  2.08377077e-05,  1.46980396e-04],
       [ 1.76111029e-04, -2.10206907e-04,  4.67434278e-05, ...,
         1.13253603e-04, -9.48055559e-05,  2.38868932e-07],
       ...,
       [-7.58726385e-05,  1.08489260e-04, -9.83591772e-05, ...,
         6.37361109e-05, -4.94541466e-05,  1.30838301e-05],
       [ 1.79013575e-04,  2.57388953e-05, -1.25080585e-04, ...,
         7.15694050e-05,  8.49050512e-05,  2.87998200e-05],
       [ 8.71380063e-05, -7.22394349e-06,  5.47041771e-05, ...,
         8.67622211e-05,  2.16954055e-06, -1.97274945e-04]])

# Experiment with solver speed

### ECOS

In [6]:
np.random.seed(1)

solve_times = []
seeds = np.random.randint(0, 100000, 100)
for i in trange(100):
    prob = evaluate_solver(prices_train, P_max=10, spread_max=1, solver="ECOS",\
         M=None)
    solve_times.append(prob.solver_stats.solve_time)
print(np.mean(solve_times))

100%|██████████| 100/100 [00:22<00:00,  4.47it/s]

0.09173866602999999





In [7]:
np.random.seed(1)

solve_times = []
seeds = np.random.randint(0, 100000, 100)
for i in trange(100):
    prob = evaluate_solver(prices_train, P_max=10, spread_max=1, solver="ECOS",\
         M=100)
    solve_times.append(prob.solver_stats.solve_time)
print(np.mean(solve_times))

100%|██████████| 100/100 [00:13<00:00,  7.30it/s]

0.041661339959999995





### MOSEK

In [8]:
np.random.seed(1)

solve_times = []
seeds = np.random.randint(0, 100000, 100)
for i in trange(100):
    prob = evaluate_solver(prices_train, P_max=10, spread_max=1, solver="MOSEK",\
         M=None)
    solve_times.append(prob.solver_stats.solve_time)
print(np.mean(solve_times))

100%|██████████| 100/100 [00:17<00:00,  5.74it/s]

0.051698272228240964





In [9]:
np.random.seed(1)

solve_times = []
seeds = np.random.randint(0, 100000, 100)
for i in trange(100):
    prob = evaluate_solver(prices_train, P_max=10, spread_max=1, solver="MOSEK",\
         M=100)
    solve_times.append(prob.solver_stats.solve_time)
print(np.mean(solve_times))

100%|██████████| 100/100 [00:11<00:00,  8.53it/s]

0.030637469291687012





### GLPK

In [12]:
np.random.seed(1)
import time
solve_times = []
seeds = np.random.randint(0, 100000, 100)
for i in trange(100):
    a = time.time()
    prob = evaluate_solver(prices_train, P_max=10, spread_max=1, solver="GLPK")
    b = time.time()
    solve_times.append(b-a)
    # solve_times.append(prob.solver_stats.solve_time)
print(np.mean(solve_times))

100%|██████████| 100/100 [00:47<00:00,  2.10it/s]


TypeError: unsupported operand type(s) for +: 'NoneType' and 'NoneType'

### CLARABEL

In [10]:
np.random.seed(1)

solve_times = []
seeds = np.random.randint(0, 100000, 100)
for i in trange(100):
    prob = evaluate_solver(prices_train, P_max=10, spread_max=1, solver="CLARABEL",\
         M=None)
    solve_times.append(prob.solver_stats.solve_time)
print(np.mean(solve_times))

100%|██████████| 100/100 [00:20<00:00,  4.91it/s]

0.07484680669999999





In [11]:
np.random.seed(1)

solve_times = []
seeds = np.random.randint(0, 100000, 100)
for i in trange(100):
    prob = evaluate_solver(prices_train, P_max=10, spread_max=1, solver="CLARABEL",\
         M=100)
    solve_times.append(prob.solver_stats.solve_time)
print(np.mean(solve_times))

100%|██████████| 100/100 [00:13<00:00,  7.54it/s]

0.04160965142999999



