In [5]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(441)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("exp(sigma11)", "exp(sigma12)", "exp(sigma21)", "exp(sigma22)"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return
def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, -1000, -1000
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, -1.4118, -1.3574


num_iter = 1
initial0 = [1, 1, -1, -1]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True})
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1]  1  1 -1 -1

5.043932089409035
4.848796057384303
return_mean1    257.313378
return_mean2    251.792801
return_sd1      200.487853
return_sd2      178.956875
dtype: float64
888.550906163143
---
[1]  1  1 -1 -1

5.064931605699332
4.983529890659132
return_mean1    254.207319
return_mean2    248.394881
return_sd1      199.448290
return_sd2      181.083751
dtype: float64
883.1342419634568
---
[1]  2  1 -1 -1

5.023942365564527
5.000547726120866
return_mean1    356.434064
return_mean2    247.950593
return_sd1      235.424358
return_sd2      173.147819
dtype: float64
1012.9568337539648
---
[1] -0.618034  1.000000 -1.000000 -1.000000

5.063860422575563
4.8977924216023005
return_mean1    379.869747
return_mean2    251.211521
return_sd1      198.540187
return_sd2      177.149370
dtype: float64
1006.7708250147936
---
[1]  1  1 -1 -1

5.083635029227609
4.932762784685886
return_mean1    259.888573
return_mean2    249.592561
return_sd1      199.502427
return_sd2      178.989649
dtype: float64


dtype: float64
437.1269980764399
---
[1]  0.06860734  0.02238247 -1.00000000 -1.00000000

5.065030923057463
4.99091111318031
return_mean1     33.592092
return_mean2     34.372928
return_sd1      194.295178
return_sd2      172.553827
dtype: float64
434.81402606303567
---
[1]  0.06860734  0.02248022 -1.00000000 -1.00000000

5.017271177690762
4.931232010300562
return_mean1     34.893059
return_mean2     33.988801
return_sd1      194.534667
return_sd2      172.832877
dtype: float64
436.2494040332532
---
[1]  0.06860734  0.02248022  0.00000000 -1.00000000

5.046526224477517
5.127380512073765
return_mean1      77.530255
return_mean2      35.432144
return_sd1      1185.351096
return_sd2       171.800945
dtype: float64
1470.1144405601656
---
[1]  0.06860734  0.02248022 -2.61803400 -1.00000000

5.029849614130829
5.011408443674001
return_mean1     23.017164
return_mean2     34.359030
return_sd1      267.385467
return_sd2      171.799907
dtype: float64
496.5615690149033
---
[1]  0.06860734  0.022

dtype: float64
126.77432142319284
---
[1]  0.06860734  0.02248022 -1.42315568 -1.36325136

5.013996915529532
5.041267559772866
return_mean1    30.627088
return_mean2    31.660504
return_sd1      19.706276
return_sd2      20.911568
dtype: float64
102.90543619005025
---
[1]  0.06860734  0.02248022 -1.42315568 -1.41262715

5.0320215816989275
5.032496023021286
return_mean1    29.504575
return_mean2    28.311401
return_sd1      20.151733
return_sd2      27.570702
dtype: float64
105.53841152265548
---
[1]  0.06860734  0.02248022 -1.42315568 -1.38869331

5.026780202155203
5.040164414380262
return_mean1    27.461909
return_mean2    27.680701
return_sd1      21.280038
return_sd2      23.597313
dtype: float64
100.01996031798234
---
[1]  0.06860734  0.02248022 -1.42315568 -1.37787130

5.004780317640271
4.974017444646171
return_mean1    27.475338
return_mean2    29.033732
return_sd1      20.346042
return_sd2      21.163691
dtype: float64
98.01880263713447
---
[1]  0.06860734  0.02248022 -1.4231556

5.043065675612404
return_mean1    29.181029
return_mean2    30.892597
return_sd1      19.115315
return_sd2      21.083693
dtype: float64
100.27263331125127
---
[1]  0.06942324  0.02248022 -1.42315568 -1.36987041

5.0781974567799635
5.0179648555635294
return_mean1    29.457731
return_mean2    29.416881
return_sd1      18.428945
return_sd2      20.042379
dtype: float64
97.34593519890912
---
[1]  0.06937929  0.02248022 -1.42315568 -1.36987041

4.950140274593623
5.049700434809436
return_mean1    28.460543
return_mean2    29.335925
return_sd1      20.029256
return_sd2      19.701004
dtype: float64
97.52672840812993
---
[1]  0.06940646  0.02248022 -1.42315568 -1.36987041

5.002519160019739
4.977078359006279
return_mean1    27.590167
return_mean2    27.677543
return_sd1      20.192911
return_sd2      20.156223
dtype: float64
95.61684397946603
---
[1]  0.06938967  0.02248022 -1.42315568 -1.36987041

4.973189215547922
5.005399763244582
return_mean1    30.452066
return_mean2    30.100151
return_

dtype: float64
99.2504513484144
---
[1]  0.06939608  0.03004303 -1.42315568 -1.36987041

4.965929503878852
4.972300097985354
return_mean1    28.404651
return_mean2    28.688368
return_sd1      19.551730
return_sd2      21.010931
dtype: float64
97.65568017717523
---
[1]  0.06939608  0.03004303 -0.42315568 -1.36987041

4.973508381567346
5.068660219073018
return_mean1     53.294172
return_mean2     26.584167
return_sd1      644.205109
return_sd2       20.944605
dtype: float64
745.0280532644331
---
[1]  0.06939608  0.03004303 -3.04118968 -1.36987041

5.0436705475020185
4.973744330527269
return_mean1     20.756739
return_mean2     29.517278
return_sd1      306.827990
return_sd2       19.909468
dtype: float64
377.0114745532508
---
[1]  0.06939608  0.03004303 -1.42315568 -1.36987041

5.028346320492397
4.995677725170369
return_mean1    26.793903
return_mean2    27.616683
return_sd1      19.320759
return_sd2      20.629963
dtype: float64
94.36130767918263
---
[1]  0.06939608  0.03004303 -2.0411

dtype: float64
100.56278882364298
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

4.978022112860168
4.97153249731018
return_mean1    28.763366
return_mean2    28.987153
return_sd1      17.318675
return_sd2      21.841596
dtype: float64
96.91079083246049
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

5.059021119136233
4.981967662573735
return_mean1    28.534703
return_mean2    26.399098
return_sd1      18.368461
return_sd2      20.274237
dtype: float64
93.57649957201798
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

5.051843146396627
4.9798629657604785
return_mean1    30.859124
return_mean2    28.439020
return_sd1      19.633952
return_sd2      21.336172
dtype: float64
100.26826711032183
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

5.0407010172525135
5.007698733304744
return_mean1    26.952981
return_mean2    28.415125
return_sd1      19.315070
return_sd2      21.787769
dtype: float64
96.47094471366013
---
[1]  0.06939608  0.03004303 -1.42315594 

4.998517419190814
5.077418712104621
return_mean1    26.861038
return_mean2    28.601335
return_sd1      20.750709
return_sd2      27.158217
dtype: float64
103.3712987869296
---
[1]  0.06939608  0.03004303 -1.42315594 -1.31195317

5.088504639940567
5.001546117631991
return_mean1    28.922794
return_mean2    28.704275
return_sd1      20.545541
return_sd2      24.397874
dtype: float64
102.57048396068573
---
[1]  0.06939608  0.03004303 -1.42315594 -1.31190624

5.007217554133525
5.075479720648881
return_mean1    27.682119
return_mean2    26.878172
return_sd1      19.726649
return_sd2      25.597367
dtype: float64
99.88430690719876
---
[1]  0.06939608  0.03004303 -1.42315594 -1.31193357

4.994996155386956
4.98864230334755
return_mean1    27.043238
return_mean2    30.899114
return_sd1      18.932788
return_sd2      27.262026
dtype: float64
104.13716497887548
---
[1]  0.06939608  0.03004303 -1.42315594 -1.31191565

5.078246875181645
5.004761164146836
return_mean1    28.823112
return_mean2    2

dtype: float64
104.13858656108935
---
[1]  0.07542793  1.03004303 -1.42315594 -1.31192146

4.992722063926402
4.866121615668144
return_mean1     31.068136
return_mean2    258.515777
return_sd1       18.622413
return_sd2       32.643437
dtype: float64
340.84976313923926
---
[1]  0.07542793 -1.58799097 -1.42315594 -1.31192146

5.029822252389404
5.170481891297585
return_mean1      30.131395
return_mean2    1653.917163
return_sd1        18.321441
return_sd2       450.550101
dtype: float64
2152.92009954531
---
[1]  0.07542793  0.03004303 -1.42315594 -1.31192146

5.030217584124392
4.991309260386722
return_mean1    29.805158
return_mean2    32.330506
return_sd1      19.960779
return_sd2      26.263784
dtype: float64
108.36022664200141
---
[1]  0.07542793 -0.58799095 -1.42315594 -1.31192146

5.013500685700763
5.08595727102201
return_mean1     29.631761
return_mean2    350.778445
return_sd1       20.884604
return_sd2       26.748800
dtype: float64
428.0436097906171
---
[1]  0.07542793  0.4120090


5.050949262646782
5.037604897716858
return_mean1    32.162959
return_mean2    26.954885
return_sd1      53.850934
return_sd2      26.737458
dtype: float64
139.70623556087523
---
[1]  0.07542793  0.01132853 -1.41713942 -1.31192146

4.996191035275464
5.0485146373788625
return_mean1    29.242537
return_mean2    30.706433
return_sd1      19.028760
return_sd2      27.632291
dtype: float64
106.6100210082025
---
[1]  0.07542793  0.01132853 -1.36370944 -1.31192146

4.988831332923793
5.053185564846189
return_mean1    31.025002
return_mean2    30.106688
return_sd1      23.008132
return_sd2      25.270471
dtype: float64
109.41029259017236
---
[1]  0.07542793  0.01132853 -1.39628765 -1.31192146

4.950747815966103
4.981002461546946
return_mean1    30.205364
return_mean2    30.236693
return_sd1      20.196244
return_sd2      26.862460
dtype: float64
107.5007613224474
---
[1]  0.07542793  0.01132853 -1.40895526 -1.31192146

5.011347540739124
5.03849454800125
return_mean1    31.242404
return_mean2   

dtype: float64
103.34881370700168
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31565748

5.011869523613602
4.951031172315016
return_mean1    29.134215
return_mean2    32.433505
return_sd1      20.558656
return_sd2      25.272168
dtype: float64
107.39854390690329
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31669014

5.062580936701368
5.027820916970252
return_mean1    29.742278
return_mean2    29.456177
return_sd1      17.517882
return_sd2      25.262040
dtype: float64
101.97837598742032
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31654777

4.994426736219841
5.010740903485009
return_mean1    32.228066
return_mean2    30.711335
return_sd1      18.264083
return_sd2      25.665513
dtype: float64
106.8689978611059
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31669062

4.9903215966979015
4.9855732702118285
return_mean1    28.712826
return_mean2    29.182906
return_sd1      19.535703
return_sd2      25.434520
dtype: float64
102.86595440868632
---
[1]  0.07542793  0.01132853 -1.407497