# ou44 Powell

In [5]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(441)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("exp(sigma11)", "exp(sigma12)", "exp(sigma21)", "exp(sigma22)"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return
def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, -1000, -1000
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, -1.4118, -1.3574


num_iter = 1
initial0 = [1, 1, -1, -1]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True})
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1]  1  1 -1 -1

5.043932089409035
4.848796057384303
return_mean1    257.313378
return_mean2    251.792801
return_sd1      200.487853
return_sd2      178.956875
dtype: float64
888.550906163143
---
[1]  1  1 -1 -1

5.064931605699332
4.983529890659132
return_mean1    254.207319
return_mean2    248.394881
return_sd1      199.448290
return_sd2      181.083751
dtype: float64
883.1342419634568
---
[1]  2  1 -1 -1

5.023942365564527
5.000547726120866
return_mean1    356.434064
return_mean2    247.950593
return_sd1      235.424358
return_sd2      173.147819
dtype: float64
1012.9568337539648
---
[1] -0.618034  1.000000 -1.000000 -1.000000

5.063860422575563
4.8977924216023005
return_mean1    379.869747
return_mean2    251.211521
return_sd1      198.540187
return_sd2      177.149370
dtype: float64
1006.7708250147936
---
[1]  1  1 -1 -1

5.083635029227609
4.932762784685886
return_mean1    259.888573
return_mean2    249.592561
return_sd1      199.502427
return_sd2      178.989649
dtype: float64


dtype: float64
437.1269980764399
---
[1]  0.06860734  0.02238247 -1.00000000 -1.00000000

5.065030923057463
4.99091111318031
return_mean1     33.592092
return_mean2     34.372928
return_sd1      194.295178
return_sd2      172.553827
dtype: float64
434.81402606303567
---
[1]  0.06860734  0.02248022 -1.00000000 -1.00000000

5.017271177690762
4.931232010300562
return_mean1     34.893059
return_mean2     33.988801
return_sd1      194.534667
return_sd2      172.832877
dtype: float64
436.2494040332532
---
[1]  0.06860734  0.02248022  0.00000000 -1.00000000

5.046526224477517
5.127380512073765
return_mean1      77.530255
return_mean2      35.432144
return_sd1      1185.351096
return_sd2       171.800945
dtype: float64
1470.1144405601656
---
[1]  0.06860734  0.02248022 -2.61803400 -1.00000000

5.029849614130829
5.011408443674001
return_mean1     23.017164
return_mean2     34.359030
return_sd1      267.385467
return_sd2      171.799907
dtype: float64
496.5615690149033
---
[1]  0.06860734  0.022

dtype: float64
126.77432142319284
---
[1]  0.06860734  0.02248022 -1.42315568 -1.36325136

5.013996915529532
5.041267559772866
return_mean1    30.627088
return_mean2    31.660504
return_sd1      19.706276
return_sd2      20.911568
dtype: float64
102.90543619005025
---
[1]  0.06860734  0.02248022 -1.42315568 -1.41262715

5.0320215816989275
5.032496023021286
return_mean1    29.504575
return_mean2    28.311401
return_sd1      20.151733
return_sd2      27.570702
dtype: float64
105.53841152265548
---
[1]  0.06860734  0.02248022 -1.42315568 -1.38869331

5.026780202155203
5.040164414380262
return_mean1    27.461909
return_mean2    27.680701
return_sd1      21.280038
return_sd2      23.597313
dtype: float64
100.01996031798234
---
[1]  0.06860734  0.02248022 -1.42315568 -1.37787130

5.004780317640271
4.974017444646171
return_mean1    27.475338
return_mean2    29.033732
return_sd1      20.346042
return_sd2      21.163691
dtype: float64
98.01880263713447
---
[1]  0.06860734  0.02248022 -1.4231556

dtype: float64
99.2504513484144
---
[1]  0.06939608  0.03004303 -1.42315568 -1.36987041

4.965929503878852
4.972300097985354
return_mean1    28.404651
return_mean2    28.688368
return_sd1      19.551730
return_sd2      21.010931
dtype: float64
97.65568017717523
---
[1]  0.06939608  0.03004303 -0.42315568 -1.36987041

4.973508381567346
5.068660219073018
return_mean1     53.294172
return_mean2     26.584167
return_sd1      644.205109
return_sd2       20.944605
dtype: float64
745.0280532644331
---
[1]  0.06939608  0.03004303 -3.04118968 -1.36987041

5.0436705475020185
4.973744330527269
return_mean1     20.756739
return_mean2     29.517278
return_sd1      306.827990
return_sd2       19.909468
dtype: float64
377.0114745532508
---
[1]  0.06939608  0.03004303 -1.42315568 -1.36987041

5.028346320492397
4.995677725170369
return_mean1    26.793903
return_mean2    27.616683
return_sd1      19.320759
return_sd2      20.629963
dtype: float64
94.36130767918263
---
[1]  0.06939608  0.03004303 -2.0411

dtype: float64
100.56278882364298
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

4.978022112860168
4.97153249731018
return_mean1    28.763366
return_mean2    28.987153
return_sd1      17.318675
return_sd2      21.841596
dtype: float64
96.91079083246049
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

5.059021119136233
4.981967662573735
return_mean1    28.534703
return_mean2    26.399098
return_sd1      18.368461
return_sd2      20.274237
dtype: float64
93.57649957201798
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

5.051843146396627
4.9798629657604785
return_mean1    30.859124
return_mean2    28.439020
return_sd1      19.633952
return_sd2      21.336172
dtype: float64
100.26826711032183
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

5.0407010172525135
5.007698733304744
return_mean1    26.952981
return_mean2    28.415125
return_sd1      19.315070
return_sd2      21.787769
dtype: float64
96.47094471366013
---
[1]  0.06939608  0.03004303 -1.42315594 

dtype: float64
104.13858656108935
---
[1]  0.07542793  1.03004303 -1.42315594 -1.31192146

4.992722063926402
4.866121615668144
return_mean1     31.068136
return_mean2    258.515777
return_sd1       18.622413
return_sd2       32.643437
dtype: float64
340.84976313923926
---
[1]  0.07542793 -1.58799097 -1.42315594 -1.31192146

5.029822252389404
5.170481891297585
return_mean1      30.131395
return_mean2    1653.917163
return_sd1        18.321441
return_sd2       450.550101
dtype: float64
2152.92009954531
---
[1]  0.07542793  0.03004303 -1.42315594 -1.31192146

5.030217584124392
4.991309260386722
return_mean1    29.805158
return_mean2    32.330506
return_sd1      19.960779
return_sd2      26.263784
dtype: float64
108.36022664200141
---
[1]  0.07542793 -0.58799095 -1.42315594 -1.31192146

5.013500685700763
5.08595727102201
return_mean1     29.631761
return_mean2    350.778445
return_sd1       20.884604
return_sd2       26.748800
dtype: float64
428.0436097906171
---
[1]  0.07542793  0.4120090


5.050949262646782
5.037604897716858
return_mean1    32.162959
return_mean2    26.954885
return_sd1      53.850934
return_sd2      26.737458
dtype: float64
139.70623556087523
---
[1]  0.07542793  0.01132853 -1.41713942 -1.31192146

4.996191035275464
5.0485146373788625
return_mean1    29.242537
return_mean2    30.706433
return_sd1      19.028760
return_sd2      27.632291
dtype: float64
106.6100210082025
---
[1]  0.07542793  0.01132853 -1.36370944 -1.31192146

4.988831332923793
5.053185564846189
return_mean1    31.025002
return_mean2    30.106688
return_sd1      23.008132
return_sd2      25.270471
dtype: float64
109.41029259017236
---
[1]  0.07542793  0.01132853 -1.39628765 -1.31192146

4.950747815966103
4.981002461546946
return_mean1    30.205364
return_mean2    30.236693
return_sd1      20.196244
return_sd2      26.862460
dtype: float64
107.5007613224474
---
[1]  0.07542793  0.01132853 -1.40895526 -1.31192146

5.011347540739124
5.03849454800125
return_mean1    31.242404
return_mean2   

dtype: float64
103.34881370700168
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31565748

5.011869523613602
4.951031172315016
return_mean1    29.134215
return_mean2    32.433505
return_sd1      20.558656
return_sd2      25.272168
dtype: float64
107.39854390690329
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31669014

5.062580936701368
5.027820916970252
return_mean1    29.742278
return_mean2    29.456177
return_sd1      17.517882
return_sd2      25.262040
dtype: float64
101.97837598742032
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31654777

4.994426736219841
5.010740903485009
return_mean1    32.228066
return_mean2    30.711335
return_sd1      18.264083
return_sd2      25.665513
dtype: float64
106.8689978611059
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31669062

4.9903215966979015
4.9855732702118285
return_mean1    28.712826
return_mean2    29.182906
return_sd1      19.535703
return_sd2      25.434520
dtype: float64
102.86595440868632
---
[1]  0.07542793  0.01132853 -1.407497

# ou44 powell with bound

In [14]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(441)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return
def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)
    print(int(np.random.randint(low=0, high=980608, size=(1,))))

    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)

    
    print(int(np.random.randint(low=0, high=980608, size=(1,))))
    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1] 0.5 0.5 0.5 0.5

139579
4.965377269571503
759528
5.056229292983301
return_mean1    152.729522
return_mean2    148.249634
return_sd1      401.295893
return_sd2      382.336648
dtype: float64
1084.6116960420547
---
[1] 0.381966 0.500000 0.500000 0.500000

446495
5.072343274454347
639526
4.93928787351248
return_mean1    127.459732
return_mean2    152.751458
return_sd1      404.409955
return_sd2      383.563380
dtype: float64
1068.184524893641
---
[1] 0.618034 0.500000 0.500000 0.500000

776673
5.024144339865143
461617
4.975628191500542
return_mean1    181.797022
return_mean2    151.063254
return_sd1      401.794446
return_sd2      382.612903
dtype: float64
1117.267625412759
---
[1] 0.236068 0.500000 0.500000 0.500000

295057
4.96765445391943
971113
4.95850409221641
return_mean1     82.745441
return_mean2    148.204443
return_sd1      400.922213
return_sd2      385.459701
dtype: float64
1017.3317981598467
---
[1] 0.145898 0.500000 0.500000 0.500000

802950
4.994798502862585
164012
5.


282682
5.012569901641081
918526
5.015733609625192
return_mean1     47.192519
return_mean2     43.913901
return_sd1      399.283190
return_sd2      377.271476
dtype: float64
867.6610857626022
---
[1] 0.07275400 0.05479339 0.50000000 0.50000000

288953
5.02266389944634
721877
5.06098183035231
return_mean1     43.167372
return_mean2     44.988838
return_sd1      399.828995
return_sd2      385.869943
dtype: float64
873.8551483819049
---
[1] 0.07275400 0.05626029 0.50000000 0.50000000

872234
5.0315525683315965
949041
5.059838436708283
return_mean1     44.310252
return_mean2     44.708500
return_sd1      400.534950
return_sd2      381.031203
dtype: float64
870.5849045260002
---
[1] 0.07275400 0.05527155 0.50000000 0.50000000

257065
4.971320088930708
395775
5.2083684871524865
return_mean1     46.571482
return_mean2     42.891805
return_sd1      400.799026
return_sd2      377.822614
dtype: float64
868.0849260751136
---
[1] 0.07275400 0.05583186 0.50000000 0.50000000

914170
4.96548817275969


541457
5.011775362290098
936449
4.999063433036039
return_mean1     28.788608
return_mean2     44.336662
return_sd1       20.639980
return_sd2      384.035845
dtype: float64
477.80109503942583
---
[1] 0.0727540 0.0554542 0.2352253 0.5000000

691537
5.038487055307911
200411
4.942550537104072
return_mean1     28.252083
return_mean2     44.495488
return_sd1       21.141297
return_sd2      379.778521
dtype: float64
473.6673893989735
---
[1] 0.0727540 0.0554542 0.2352235 0.5000000

694537
5.069433986606938
655266
5.1040991701640985
return_mean1     28.497171
return_mean2     42.410355
return_sd1       21.766890
return_sd2      385.437236
dtype: float64
478.11165235304634
---
[1] 0.0727540 0.0554542 0.2352228 0.5000000

231579
4.970701226736482
10010
5.1166027722851135
return_mean1     29.208872
return_mean2     42.811384
return_sd1       20.838645
return_sd2      377.798805
dtype: float64
470.6577067394529
---
[1] 0.0727540 0.0554542 0.2352224 0.3819660

787880
4.972167739720174
697290
4.98

dtype: float64
112.9493428486008
---
[1] 0.09016994 0.05545420 0.23522242 0.25376854

161016
5.03559458779934
808408
5.015614939837985
return_mean1    29.826362
return_mean2    28.077526
return_sd1      22.622919
return_sd2      21.156574
dtype: float64
101.68338073498184
---
[1] 0.04929807 0.05545420 0.23522242 0.25376854

417975
5.028271472010942
269647
4.9721773701895895
return_mean1    26.271557
return_mean2    27.066026
return_sd1      20.683953
return_sd2      20.620066
dtype: float64
94.64160152391071
---
[1] 0.03046788 0.05545420 0.23522242 0.25376854

592754
5.041888678865316
116065
5.043990102892494
return_mean1    27.819805
return_mean2    26.572675
return_sd1      20.920797
return_sd2      20.044415
dtype: float64
95.3576920324445
---
[1] 0.04528052 0.05545420 0.23522242 0.25376854

728276
4.986377514964262
58093
5.071347361781808
return_mean1    27.505673
return_mean2    27.196316
return_sd1      21.410372
return_sd2      21.374652
dtype: float64
97.48701320967338
---
[1] 


764218
5.0051411687925995
850279
5.069550821673397
return_mean1    28.251268
return_mean2    28.683607
return_sd1      21.478380
return_sd2      20.281922
dtype: float64
98.69517613907121
---
[1] 0.04930514 0.07109270 0.23522242 0.25376854

923949
5.004388526230572
12292
4.957125377361506
return_mean1    25.614791
return_mean2    29.044189
return_sd1      19.962316
return_sd2      20.263571
dtype: float64
94.88486858279414
---
[1] 0.04930514 0.07174704 0.23522242 0.25376854

396357
5.035111329761336
655636
5.0726867706047605
return_mean1    25.226413
return_mean2    28.512565
return_sd1      22.327206
return_sd2      19.607923
dtype: float64
95.67410538251997
---
[1] 0.04930514 0.07209892 0.23522242 0.25376854

489217
5.068787245486935
790199
4.980358221183625
return_mean1    26.692788
return_mean2    28.321804
return_sd1      21.581256
return_sd2      20.515992
dtype: float64
97.11184044180999
---
[1] 0.04930514 0.07252189 0.23522242 0.25376854

273755
5.049132476606703
370251
5.0215

5.043906931475704
411256
5.022315172078918
return_mean1    25.218373
return_mean2    30.996412
return_sd1      19.824982
return_sd2      20.489917
dtype: float64
96.52968378164573
---
[1] 0.04930514 0.07228466 0.24915578 0.25376854

580265
4.955223303847729
147038
4.990856340552692
return_mean1    26.463766
return_mean2    30.920896
return_sd1      19.884656
return_sd2      21.844091
dtype: float64
99.11340950454387
---
[1] 0.04930514 0.07228466 0.24915857 0.25376854

514736
5.0166412021661495
926022
5.012209880247959
return_mean1    26.991772
return_mean2    31.435425
return_sd1      18.491201
return_sd2      20.785486
dtype: float64
97.70388476876151
---
[1] 0.04930514 0.07228466 0.24915730 0.25376854

474015
5.023161192170198
546833
5.0029028812670955
return_mean1    25.905526
return_mean2    29.786550
return_sd1      20.790013
return_sd2      21.369066
dtype: float64
97.85115440880439
---
[1] 0.04930514 0.07228466 0.24915788 0.25376854

789415
5.010523874612971
648685
5.04510786264

4.995974918253694
958469
5.046943590654507
return_mean1    46.865197
return_mean2    29.422952
return_sd1      22.458429
return_sd2      18.475900
dtype: float64
117.22247800770299
---
[1] 0.09016994 0.07228466 0.24915824 0.25756878

425697
5.017441474257474
350681
4.97168164135015
return_mean1    32.841013
return_mean2    31.394134
return_sd1      20.040161
return_sd2      20.925877
dtype: float64
105.20118538309923
---
[1] 0.05572809 0.07228466 0.24915824 0.25756878

661898
5.016702800295643
444799
5.037764850970431
return_mean1    25.928455
return_mean2    29.252048
return_sd1      21.321501
return_sd2      20.219599
dtype: float64
96.72160251656474
---
[1] 0.03444185 0.07228466 0.24915824 0.25756878

134946
5.022433659230957
766773
5.001986830646071
return_mean1    28.347971
return_mean2    28.416691
return_sd1      20.703543
return_sd2      19.708080
dtype: float64
97.17628523126129
---
[1] 0.04730947 0.07228466 0.24915824 0.25756878

501763
5.004143002379756
615188
5.019660968649

519206
5.0352967172235035
return_mean1    26.157149
return_mean2    26.647478
return_sd1      21.660663
return_sd2      18.383322
dtype: float64
92.84861242708251
---
[1] 0.05572776 0.04670714 0.24915824 0.25756878

472037
5.043029351218355
550727
4.984803578493427
return_mean1    25.935238
return_mean2    28.970310
return_sd1      20.233614
return_sd2      19.494687
dtype: float64
94.63384923668197
---
[1] 0.05572776 0.04679832 0.24915824 0.25756878

40463
5.058742602810391
168412
5.008970254987959
return_mean1    27.673231
return_mean2    29.738320
return_sd1      21.802780
return_sd2      20.575802
dtype: float64
99.79013350860097
---
[1] 0.05572776 0.04674193 0.24915824 0.25756878

346633
5.019293107886413
851273
4.998015909999421
return_mean1    27.277831
return_mean2    27.361253
return_sd1      20.718762
return_sd2      19.744369
dtype: float64
95.10221527708285
---
[1] 0.05572776 0.04677676 0.24915824 0.25756878

277857
5.034809362495653
426742
5.0482673668108085
return_mean1  

4.974088668870109
return_mean1    26.193525
return_mean2    30.440638
return_sd1      19.903351
return_sd2      17.919844
dtype: float64
94.45735747046632
---
[1] 0.05572776 0.04676344 0.23806648 0.25756878

880125
5.0195596081429645
916612
5.039541623594431
return_mean1    28.043474
return_mean2    30.125700
return_sd1      21.871545
return_sd2      21.841090
dtype: float64
101.88180978308853
---
[1] 0.05572776 0.04676344 0.23806554 0.25756878

584287
5.001548263204198
651924
5.027442922232981
return_mean1    26.936224
return_mean2    25.964196
return_sd1      21.895887
return_sd2      20.783030
dtype: float64
95.57933659197133
---
[1] 0.05572776 0.04676344 0.23806592 0.38196601

638758
4.9666455090647
231352
5.020425059677112
return_mean1     25.548825
return_mean2     35.443027
return_sd1       22.394931
return_sd2      196.809232
dtype: float64
280.1960154664455
---
[1] 0.05572776 0.04676344 0.23806592 0.61803399

246549
4.958669044985628
154223
4.894482052018794
return_mean1     2

return_mean1    29.478834
return_mean2    28.574834
return_sd1      19.756864
return_sd2      19.136077
dtype: float64
96.94660873692936
---
[1] 0.07392602 0.04676344 0.23806592 0.25735112

962575
4.966447812297378
334569
5.002754979773827
return_mean1    28.005251
return_mean2    29.082253
return_sd1      20.139135
return_sd2      20.660309
dtype: float64
97.88694819500449
---
[1] 0.08125550 0.04676344 0.23806592 0.25735112

415852
5.039974855346039
436339
5.050031752274129
return_mean1    30.011879
return_mean2    28.331867
return_sd1      20.541942
return_sd2      20.350983
dtype: float64
99.23667107865855
---
[1] 0.07761951 0.04676344 0.23806592 0.25735112

355684
5.038612409152338
478742
5.0899027683085025
return_mean1    28.518103
return_mean2    29.552056
return_sd1      20.515775
return_sd2      20.684433
dtype: float64
99.27036810473254
---
[1] 0.07623069 0.04676344 0.23806592 0.25735112

947491
4.986704836021503
81740
5.051389181742909
return_mean1    29.908724
return_mean2  

dtype: float64
99.99639251862482
---
[1] 0.07536684 0.03568851 0.23806592 0.25735112

508519
5.015052767778878
246124
5.025087479193776
return_mean1    29.871846
return_mean2    29.335342
return_sd1      20.613965
return_sd2      21.852077
dtype: float64
101.67322939553544
---
[1] 0.07536684 0.03580755 0.23806592 0.25735112

227083
4.987198900181688
54295
5.020794195181692
return_mean1    29.364124
return_mean2    28.555452
return_sd1      21.078762
return_sd2      21.006373
dtype: float64
100.00471126562277
---
[1] 0.07536684 0.03573398 0.23806592 0.25735112

30485
5.038134226980142
656720
5.061396693834654
return_mean1    32.006593
return_mean2    29.283552
return_sd1      18.506756
return_sd2      19.747888
dtype: float64
99.54478829254282
---
[1] 0.07536684 0.03577945 0.23806592 0.25735112

342582
5.043840082906231
363852
5.010465266763759
return_mean1    28.924487
return_mean2    28.017139
return_sd1      20.070470
return_sd2      18.728456
dtype: float64
95.74055148976375
---
[1]

101.75895495115866
---
[1] 0.07536684 0.03576208 0.25270101 0.25735112

227332
4.978583015240498
814267
5.102694328153615
return_mean1    29.918489
return_mean2    29.098325
return_sd1      23.417514
return_sd2      20.778132
dtype: float64
103.2124593255627
---
[1] 0.07536684 0.03576208 0.25270963 0.25735112

233530
5.056217061351693
223355
5.039641960975845
return_mean1    30.544257
return_mean2    26.848949
return_sd1      23.074152
return_sd2      19.961926
dtype: float64
100.42928296306432
---
[1] 0.07536684 0.03576208 0.25270430 0.25735112

303255
5.048816206273894
320860
5.038326579897051
return_mean1    30.849205
return_mean2    27.575696
return_sd1      23.449857
return_sd2      22.026118
dtype: float64
103.90087734026233
---
[1] 0.07536684 0.03576208 0.25270759 0.25735112

840479
5.029840873706007
680143
5.027623232321079
return_mean1    28.971587
return_mean2    28.045026
return_sd1      22.770118
return_sd2      20.142953
dtype: float64
99.92968337035629
---
[1] 0.07536684 

In [19]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(442)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return
def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)
    print(int(np.random.randint(low=0, high=980608, size=(1,))))

    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)

    
    print(int(np.random.randint(low=0, high=980608, size=(1,))))
    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1] 0.5 0.5 0.5 0.5

871160
5.107518288216562
492686
4.916719911785419
return_mean1    153.179575
return_mean2    146.352891
return_sd1      405.108759
return_sd2      377.381208
dtype: float64
1082.0224320369239
---
[1] 0.381966 0.500000 0.500000 0.500000

114799
5.006506507033449
85347
4.92598698051963
return_mean1    117.327421
return_mean2    149.270717
return_sd1      403.481289
return_sd2      384.168456
dtype: float64
1054.2478837092963
---
[1] 0.618034 0.500000 0.500000 0.500000

942697
5.015657325856084
30181
4.937777193930079
return_mean1    185.091842
return_mean2    149.128454
return_sd1      403.406280
return_sd2      383.245791
dtype: float64
1120.872367752793
---
[1] 0.236068 0.500000 0.500000 0.500000

658186
5.006213945158735
218942
4.973105543870786
return_mean1     77.426923
return_mean2    153.316669
return_sd1      404.197640
return_sd2      381.661796
dtype: float64
1016.6030270750377
---
[1] 0.145898 0.500000 0.500000 0.500000

664006
5.0645854607549925
295329


dtype: float64
876.1885124566611
---
[1] 0.05260318 0.09016994 0.50000000 0.50000000

433716
5.021248420373121
754304
4.9235026670030155
return_mean1     43.548091
return_mean2     46.950591
return_sd1      400.232165
return_sd2      379.211652
dtype: float64
869.9424991727506
---
[1] 0.05260318 0.07830093 0.50000000 0.50000000

238253
4.992375358516012
146610
5.0105776722704105
return_mean1     44.830519
return_mean2     49.649387
return_sd1      398.148274
return_sd2      380.364224
dtype: float64
872.9924043546648
---
[1] 0.05260318 0.10776923 0.50000000 0.50000000

341082
5.02646395397097
128772
4.985588781906789
return_mean1     44.787823
return_mean2     47.331261
return_sd1      401.354192
return_sd2      379.064136
dtype: float64
872.537412040208
---
[1] 0.05260318 0.09359762 0.50000000 0.50000000

488500
4.9701136188664075
652090
5.035157715754518
return_mean1     41.931246
return_mean2     47.071466
return_sd1      402.203469
return_sd2      377.739056
dtype: float64
868.9452

dtype: float64
473.3579475287954
---
[1] 0.05260318 0.09237315 0.23581578 0.50000000

72540
4.979167749944714
702876
5.012438749912424
return_mean1     25.508868
return_mean2     45.517691
return_sd1       19.423054
return_sd2      381.795594
dtype: float64
472.2452080004959
---
[1] 0.05260318 0.09237315 0.23571945 0.50000000

192161
5.053580636237985
94254
4.963839373896561
return_mean1     25.981195
return_mean2     44.499445
return_sd1       22.406346
return_sd2      377.602933
dtype: float64
470.4899191390092
---
[1] 0.05260318 0.09237315 0.23575625 0.50000000

386197
5.053177501359387
401130
5.046006871044684
return_mean1     24.424142
return_mean2     46.000683
return_sd1       21.260684
return_sd2      380.207400
dtype: float64
471.89290881846546
---
[1] 0.05260318 0.09237315 0.23569671 0.50000000

625577
4.98833212020273
47467
5.144456354038003
return_mean1     27.074965
return_mean2     43.309475
return_sd1       20.759460
return_sd2      375.842114
dtype: float64
466.98601543

dtype: float64
96.30464780943593
---
[1] 0.05260318 0.09237315 0.23569637 0.25210199

672239
4.912416807898759
774300
4.991128841163545
return_mean1    25.942367
return_mean2    33.580429
return_sd1      22.656890
return_sd2      19.721088
dtype: float64
101.90077363218018
---
[1] 0.05260318 0.09237315 0.23569637 0.25210548

586964
5.0081525413659085
452431
4.977679661921261
return_mean1    26.957334
return_mean2    31.096787
return_sd1      21.431114
return_sd2      21.616751
dtype: float64
101.1019847136458
---
[1] 0.05260318 0.09237315 0.23569637 0.25210323

86468
5.02869992015427
855593
5.087882409810463
return_mean1    25.287234
return_mean2    31.427928
return_sd1      21.025311
return_sd2      19.771340
dtype: float64
97.51181283472388
---
[1] 0.05260318 0.09237315 0.23569637 0.25210457

902796
4.930095969584758
285595
4.955760359236679
return_mean1    26.177289
return_mean2    32.613693
return_sd1      21.791412
return_sd2      21.136618
dtype: float64
101.71901123315672
---
[1

dtype: float64
99.35528568111697
---
[1] 0.03998709 0.09237315 0.23569637 0.25210400

914210
5.028903707920305
368761
4.976135994616607
return_mean1    27.743765
return_mean2    30.527861
return_sd1      20.831308
return_sd2      21.420327
dtype: float64
100.52326079357661
---
[1] 0.03998669 0.38196601 0.23569637 0.25210400

116149
5.07412895541533
59373
4.886722200084178
return_mean1     22.939168
return_mean2    121.557717
return_sd1       20.096948
return_sd2       21.092034
dtype: float64
185.68586639958517
---
[1] 0.03998669 0.61803399 0.23569637 0.25210400

359071
4.988902653353693
396962
4.944256857437463
return_mean1     29.498597
return_mean2    178.059469
return_sd1       22.408140
return_sd2       21.483414
dtype: float64
251.44962002279215
---
[1] 0.03998669 0.23606798 0.23569637 0.25210400

562387
5.013293948673776
212665
4.9973095590713505
return_mean1    24.590006
return_mean2    72.647776
return_sd1      22.574165
return_sd2      20.877807
dtype: float64
140.68975393096

dtype: float64
97.0843004287961
---
[1] 0.03998669 0.07089968 0.23603334 0.25210400

710652
5.0830851125913235
115847
5.028725188653545
return_mean1    26.702239
return_mean2    29.604950
return_sd1      19.822673
return_sd2      22.348675
dtype: float64
98.47853637673104
---
[1] 0.03998669 0.07089968 0.23608988 0.25210400

239157
4.986594161493618
774688
4.958846853555103
return_mean1    26.133975
return_mean2    28.080345
return_sd1      22.896085
return_sd2      21.033662
dtype: float64
98.14406742703946
---
[1] 0.03998669 0.07089968 0.23605475 0.25210400

508628
4.921884648023019
193458
5.064039289106277
return_mean1    27.337556
return_mean2    29.679009
return_sd1      20.486604
return_sd2      21.188576
dtype: float64
98.6917457591003
---
[1] 0.03998669 0.07089968 0.23607634 0.25210400

393812
4.992528287566569
774531
5.011243190450372
return_mean1    26.687864
return_mean2    29.524523
return_sd1      20.665170
return_sd2      21.713865
dtype: float64
98.59142170651135
---
[1] 

93.65659915611504
---
[1] 0.3620867 0.6191194 0.2265809 0.1812708

109761
5.057348899576967
291065
4.925834655069987
return_mean1    114.649959
return_mean2    174.101287
return_sd1       28.096044
return_sd2      117.666461
dtype: float64
434.51375102685313
---
[1] 0.2237819 0.3837222 0.2306545 0.2129254

348929
5.022478665115661
699405
4.97578505791699
return_mean1     73.571873
return_mean2    118.273775
return_sd1       26.764684
return_sd2       70.162562
dtype: float64
288.77289417121347
---
[1] 0.1383048 0.2382388 0.2331721 0.2324890

759623
5.002338885526067
509002
5.073327167529459
return_mean1    42.491286
return_mean2    71.912442
return_sd1      23.333473
return_sd2      38.666254
dtype: float64
176.40345469364212
---
[1] 0.08547708 0.14832509 0.23472811 0.24457997

816177
5.007291462515952
25078
4.962800345766681
return_mean1    30.455381
return_mean2    47.975766
return_sd1      21.887003
return_sd2      26.299168
dtype: float64
126.61731842867358
---
[1] 0.05282774 0.092


270916
5.016231112013587
798600
5.066693326209113
return_mean1    30.171028
return_mean2    29.800847
return_sd1      21.630588
return_sd2      19.562151
dtype: float64
101.16461474814963
---
[1] 0.02859637 0.05151315 0.23640347 0.25759857

118637
5.0048710563346095
697509
5.0517361503300435
return_mean1    27.770643
return_mean2    27.902504
return_sd1      21.102081
return_sd2      21.056546
dtype: float64
97.83177458935664
---
[1] 0.38196601 0.05151315 0.23640347 0.25759857

282689
4.995683412460419
93736
5.0167681798814145
return_mean1    121.873401
return_mean2     28.464951
return_sd1       22.495379
return_sd2       22.501802
dtype: float64
195.33553276317897
---
[1] 0.61803399 0.05151315 0.23640347 0.25759857

568160
4.9721706697352275
550508
5.01844747964647
return_mean1    185.349527
return_mean2     28.504262
return_sd1       21.727118
return_sd2       21.611528
dtype: float64
257.1924347937455
---
[1] 0.23606798 0.05151315 0.23640347 0.25759857

314611
5.050723889319886
96

331440
5.046308997966835
546434
5.065901849572089
return_mean1    26.651607
return_mean2    26.978378
return_sd1      18.858098
return_sd2      24.717732
dtype: float64
97.20581606234099
---
[1] 0.05387546 0.05151315 0.23640347 0.26263491

628367
4.999055466197229
641896
5.123765358172137
return_mean1    28.926404
return_mean2    28.530923
return_sd1      21.720985
return_sd2      20.513445
dtype: float64
99.69175754878125
---
[1] 0.05387546 0.05151315 0.23640347 0.25942503

485497
5.046984454283958
192561
4.980963494274562
return_mean1    26.731492
return_mean2    28.078003
return_sd1      20.969055
return_sd2      22.092010
dtype: float64
97.87055955984559
---
[1] 0.05387546 0.05151315 0.23640347 0.25819897

969432
5.013466663211604
946527
4.97767409964701
return_mean1    27.827873
return_mean2    27.604105
return_sd1      21.656071
return_sd2      22.358487
dtype: float64
99.4465344522587
---
[1] 0.05387546 0.05151315 0.23640347 0.25708145

801176
5.030760445000996
147258
5.05952204

5.060200634293074
return_mean1    25.024253
return_mean2    29.228804
return_sd1      19.113953
return_sd2      21.940625
dtype: float64
95.30763493879107
---
[1] 0.05387546 0.05151315 0.24177438 0.25708145

903194
5.013345639589575
795629
5.007934135153091
return_mean1    26.042009
return_mean2    27.641724
return_sd1      20.707325
return_sd2      19.832218
dtype: float64
94.22327588515134
---
[1] 0.05387546 0.05151315 0.24177246 0.25708145

738927
4.995431627928026
370374
5.0280619455366535
return_mean1    25.468174
return_mean2    27.287170
return_sd1      20.749545
return_sd2      20.327597
dtype: float64
93.8324865322935
---
[1] 0.05387546 0.05151315 0.24177365 0.25708145

877369
4.976925006994064
314112
5.037790288366066
return_mean1    28.975250
return_mean2    28.338609
return_sd1      19.831396
return_sd2      20.857091
dtype: float64
98.00234622900899
---
[1] 0.05387546 0.05151315 0.24177286 0.25708145

266056
4.978414003909665
66098
5.007876618542147
return_mean1    25.1175

dtype: float64
95.31057445069398
---
[1] 0.05650418 0.05598729 0.24169577 0.25647980

522252
4.972969065235863
276168
4.890021151733267
return_mean1    26.100605
return_mean2    29.262917
return_sd1      19.434904
return_sd2      18.620934
dtype: float64
93.41936038732248
---
[1] 0.05650415 0.05598723 0.24169577 0.25647981

415050
5.019768416349365
597652
4.973859376616469
return_mean1    27.380843
return_mean2    28.739847
return_sd1      19.131001
return_sd2      19.297843
dtype: float64
94.54953408343337
---
[1] 0.05650417 0.05598726 0.24169577 0.25647980

750009
5.03325138670467
374018
5.007546070311856
return_mean1    26.917418
return_mean2    31.117307
return_sd1      19.606033
return_sd2      22.899253
dtype: float64
100.54001214273039
---
[1] 0.05650416 0.05598724 0.24169577 0.25647981

180321
5.059439678467665
773223
5.034677890384985
return_mean1    27.976992
return_mean2    25.780428
return_sd1      19.273195
return_sd2      19.900495
dtype: float64
92.93110903987309
---
[1]

---
[1] 0.05543662 0.05598725 0.24169577 0.38196601

573994
4.966702704069976
189361
5.0133779967635865
return_mean1     26.374209
return_mean2     36.642759
return_sd1       19.819199
return_sd2      196.831717
dtype: float64
279.6678838079121
---
[1] 0.05543662 0.05598725 0.24169577 0.61803399

967722
5.036434400469979
377799
4.884754411471918
return_mean1     27.850223
return_mean2     45.263833
return_sd1       19.493206
return_sd2      567.770402
dtype: float64
660.3776637310966
---
[1] 0.05543662 0.05598725 0.24169577 0.23606798

958158
5.022968475766584
408885
5.000277442934099
return_mean1    27.861786
return_mean2    27.159470
return_sd1      18.560896
return_sd2      35.827317
dtype: float64
109.4094693580906
---
[1] 0.05543662 0.05598725 0.24169577 0.14589803

394168
5.0591389130931255
31883
4.998417609624576
return_mean1     31.046889
return_mean2     22.276845
return_sd1       19.453422
return_sd2      172.978064
dtype: float64
245.75522030817316
---
[1] 0.05543662 0.05598


825793
4.994635782410415
601220
4.964468776922647
return_mean1    28.576374
return_mean2    27.671134
return_sd1      19.416150
return_sd2      20.343418
dtype: float64
96.00707642326198
---
[1] 0.05543662 0.05598725 0.24152844 0.25966000

500458
5.002186969499792
575675
5.033185823340646
return_mean1    27.330717
return_mean2    30.519894
return_sd1      20.776893
return_sd2      21.072123
dtype: float64
99.69962625833773
---
[1] 0.05543662 0.05598725 0.24232427 0.25966000

764437
5.023941403543356
405920
4.999307930948729
return_mean1    27.242312
return_mean2    30.448274
return_sd1      19.626058
return_sd2      20.039444
dtype: float64
97.35608658445453
---
[1] 0.05543662 0.05598725 0.24262826 0.25966000

779633
5.026928499495524
128971
4.99322621930757
return_mean1    27.971840
return_mean2    28.824694
return_sd1      18.032070
return_sd2      19.798971
dtype: float64
94.62757554447592
---
[1] 0.05543662 0.05598725 0.24251215 0.25966000

746739
4.996206465245091
288389
5.008225

4.9860102761405605
return_mean1    31.259310
return_mean2    29.290928
return_sd1      18.877557
return_sd2      21.425213
dtype: float64
100.85300808400066
---
[1] 0.06551204 0.07313578 0.24232482 0.25735398

936471
5.061283397422926
817313
5.013483495020647
return_mean1    27.323993
return_mean2    31.820690
return_sd1      19.825951
return_sd2      20.330365
dtype: float64
99.30099880321019
---
[1] 0.06551214 0.07313596 0.24232482 0.25735396

532969
4.980925125103797
779794
5.028222111698865
return_mean1    30.407335
return_mean2    31.573499
return_sd1      19.708929
return_sd2      20.848690
dtype: float64
102.53845270629309
---
[1] 0.06551218 0.07313603 0.24232482 0.25735395

258262
5.0735395421454825
461575
5.056801829924424
return_mean1    29.742163
return_mean2    30.316920
return_sd1      20.114831
return_sd2      18.303167
dtype: float64
98.47707992147751
---
[1] 0.06551220 0.07313605 0.24232482 0.25735395

370296
4.98860777923955
229796
4.9929864090916105
return_mean1    30

# ou44 Powell with no seed

In [13]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing


# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(num_sim=num_sim,
                          mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                          sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                          xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return
def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


def multi_process(iter):

    print(iter)

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x) 
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1] 0.5 0.5 0.5 0.5

5.023367493888124
4.9449481189151
return_mean1    153.131421
return_mean2    148.903577
return_sd1      401.941049
return_sd2      378.408244
dtype: float64
1082.3842907674127
---
[1] 0.381966 0.500000 0.500000 0.500000

5.031613968941823
4.924320399555874
return_mean1    122.421447
return_mean2    150.603917
return_sd1      398.867889
return_sd2      375.009550
dtype: float64
1046.9028030451377
---
[1] 0.618034 0.500000 0.500000 0.500000

4.92081531241981
4.986561676531084
return_mean1    186.382667
return_mean2    150.571216
return_sd1      403.268873
return_sd2      381.686178
dtype: float64
1121.908934033991
---
[1] 0.236068 0.500000 0.500000 0.500000

5.021669829199146
4.955396722403672
return_mean1     77.786531
return_mean2    144.492882
return_sd1      404.148864
return_sd2      382.382306
dtype: float64
1008.8105828235778
---
[1] 0.145898 0.500000 0.500000 0.500000

4.95382353389236
5.052259989699721
return_mean1     52.062830
return_mean2    153.949626


876.7316597505644
---
[1] 0.03965469 0.11698024 0.50000000 0.50000000

4.999131019462538
4.964696792413482
return_mean1     43.054012
return_mean2     51.443278
return_sd1      405.603900
return_sd2      384.481911
dtype: float64
884.5831014264329
---
[1] 0.03965469 0.11275522 0.50000000 0.50000000

4.9296511402093754
4.907848533079072
return_mean1     44.513132
return_mean2     47.624955
return_sd1      400.852318
return_sd2      379.762437
dtype: float64
872.7528427689132
---
[1] 0.03965469 0.11091224 0.50000000 0.50000000

4.953722374160426
5.122897315059032
return_mean1     45.601354
return_mean2     52.431902
return_sd1      400.510126
return_sd2      380.341496
dtype: float64
878.8848774012058
---
[1] 0.03965469 0.11205105 0.50000000 0.50000000

5.053390328339946
4.9876194860188985
return_mean1     46.929245
return_mean2     42.709106
return_sd1      405.854988
return_sd2      377.932588
dtype: float64
873.4259265396929
---
[1] 0.03965469 0.11240316 0.50000000 0.50000000

5.07711

dtype: float64
478.9258822453412
---
[1] 0.03965469 0.11275488 0.24297916 0.50000000

5.039639878537073
4.934825084171603
return_mean1     27.551434
return_mean2     48.753981
return_sd1       20.527601
return_sd2      378.078946
dtype: float64
474.91196156786634
---
[1] 0.03965469 0.11275488 0.24297949 0.38196601

4.954627917202341
5.121598915358425
return_mean1     25.612025
return_mean2     39.263668
return_sd1       18.282162
return_sd2      194.396531
dtype: float64
277.5543861377855
---
[1] 0.03965469 0.11275488 0.24297949 0.61803399

4.986997924253102
4.996383317178394
return_mean1     26.169627
return_mean2     55.037053
return_sd1       19.413305
return_sd2      561.714454
dtype: float64
662.3344387301702
---
[1] 0.03965469 0.11275488 0.24297949 0.23606798

5.019681647285118
5.060468144522856
return_mean1    26.928991
return_mean2    35.041339
return_sd1      18.829713
return_sd2      34.074465
dtype: float64
114.87450792826539
---
[1] 0.03965469 0.11275488 0.24297949 0.145898

dtype: float64
104.78928239608284
---
[1] 0.06284388 0.11275488 0.24297949 0.25508038

5.066622414176848
5.043645343612987
return_mean1    27.198728
return_mean2    36.806494
return_sd1      20.226292
return_sd2      20.906083
dtype: float64
105.13759778475594
---
[1] 0.06703507 0.11275488 0.24297949 0.25508038

5.048586559620348
4.97986968507796
return_mean1    25.661407
return_mean2    34.785374
return_sd1      21.633303
return_sd2      22.362782
dtype: float64
104.44286583093944
---
[1] 0.06681613 0.11275488 0.24297949 0.25508038

5.045884741663371
4.9802402859238555
return_mean1    27.244982
return_mean2    35.488067
return_sd1      20.017824
return_sd2      21.855993
dtype: float64
104.60686575082086
---
[1] 0.06811743 0.11275488 0.24297949 0.25508038

5.010495431406227
5.018919896962887
return_mean1    28.123375
return_mean2    34.800850
return_sd1      18.664706
return_sd2      21.050024
dtype: float64
102.63895542377011
---
[1] 0.06869341 0.11275488 0.24297949 0.25508038

4.942

---
[1] 0.06868115 0.05550126 0.24297949 0.25508038

5.0708386168833215
4.961820528120389
return_mean1    29.215207
return_mean2    26.582501
return_sd1      19.536133
return_sd2      20.246474
dtype: float64
95.58031433405571
---
[1] 0.06868115 0.05564124 0.24297949 0.25508038

5.0046617651100425
5.066374723953036
return_mean1    29.376517
return_mean2    27.712494
return_sd1      20.650901
return_sd2      22.930964
dtype: float64
100.67087596343967
---
[1] 0.06868115 0.05569471 0.24297949 0.25508038

5.041946157766476
5.055966564235924
return_mean1    29.390635
return_mean2    29.783857
return_sd1      17.097000
return_sd2      20.005692
dtype: float64
96.27718395252569
---
[1] 0.06868115 0.05571513 0.24297949 0.25508038

5.014508759296496
4.999818042584111
return_mean1    27.213378
return_mean2    30.080421
return_sd1      19.900869
return_sd2      19.417425
dtype: float64
96.61209240399782
---
[1] 0.06868115 0.05572293 0.24297949 0.25508038

4.978034388387272
5.053286423481455
retu


4.9406808350331755
5.0453142789602925
return_mean1    28.157054
return_mean2    27.459652
return_sd1      19.083733
return_sd2      34.717903
dtype: float64
109.41834299912071
---
[1] 0.06868115 0.05572776 0.24399802 0.14589803

4.989233388562686
4.977436125431993
return_mean1     27.594892
return_mean2     22.693743
return_sd1       19.460044
return_sd2      173.357884
dtype: float64
243.10656256468314
---
[1] 0.06868115 0.05572776 0.24399802 0.25651044

4.989285068037556
5.046105851463784
return_mean1    30.353670
return_mean2    27.136223
return_sd1      19.163241
return_sd2      19.711492
dtype: float64
96.36462512980268
---
[1] 0.06868115 0.05572776 0.24399802 0.28812984

5.039331377875639
5.022163001819589
return_mean1    29.403647
return_mean2    29.293911
return_sd1      19.597593
return_sd2      48.650380
dtype: float64
126.94553055026469
---
[1] 0.06868115 0.05572776 0.24399802 0.25664116

5.013587679357218
4.979054029095548
return_mean1    26.972314
return_mean2    29.07903

5.005687973323102
return_mean1    28.265856
return_mean2    27.502219
return_sd1      20.231530
return_sd2      21.015140
dtype: float64
97.01474590912181
---
[1] 0.04462414 0.05572776 0.24399802 0.25768193

5.079366744516911
4.998374707373841
return_mean1    27.924896
return_mean2    28.768440
return_sd1      19.154468
return_sd2      21.514902
dtype: float64
97.36270638991091
---
[1] 0.04038281 0.05572776 0.24399802 0.25768193

5.008597724636911
4.9970767048725975
return_mean1    25.906474
return_mean2    27.504163
return_sd1      18.670726
return_sd2      19.056570
dtype: float64
91.13793231814934
---
[1] 0.04200285 0.05572776 0.24399802 0.25768193

5.020651717225104
4.964818775460592
return_mean1    29.528242
return_mean2    28.205103
return_sd1      19.729666
return_sd2      19.920642
dtype: float64
97.38365260991563
---
[1] 0.03938156 0.05572776 0.24399802 0.25768193

5.030575345170808
5.0498221538448975
return_mean1    27.153654
return_mean2    28.322879
return_sd1      19.45026

dtype: float64
96.48687363086803
---
[1] 0.04038321 0.03394232 0.24399802 0.25768193

4.961336292558639
4.98071587928981
return_mean1    25.701109
return_mean2    28.953353
return_sd1      19.503817
return_sd2      21.087426
dtype: float64
95.24570482636432
---
[1] 0.04038321 0.03469145 0.24399802 0.25768193

5.046729339324422
5.035277871369374
return_mean1    27.595597
return_mean2    27.134680
return_sd1      19.957283
return_sd2      19.787542
dtype: float64
94.47510120386603
---
[1] 0.04038321 0.03425105 0.24399802 0.25768193

5.0394561918953
5.041658873133751
return_mean1    26.183565
return_mean2    28.208684
return_sd1      18.616341
return_sd2      20.423382
dtype: float64
93.43197240262386
---
[1] 0.04038321 0.03453719 0.24399802 0.25768193

5.068300964331535
5.010667132482762
return_mean1    28.176936
return_mean2    27.728193
return_sd1      18.524717
return_sd2      20.074083
dtype: float64
94.50392904527675
---
[1] 0.04038321 0.03436897 0.24399802 0.25768193

5.00683383907


5.017748415933875
5.102011122497729
return_mean1    26.401010
return_mean2    29.183469
return_sd1      20.296768
return_sd2      20.387948
dtype: float64
96.2691941901237
---
[1] 0.04038321 0.03436897 0.23606635 0.25768193

5.055185646275765
5.0417750877320255
return_mean1    25.800571
return_mean2    28.952133
return_sd1      21.334837
return_sd2      21.435292
dtype: float64
97.522832910732
---
[1] 0.04038321 0.03436897 0.23606472 0.25768193

4.961534251030093
5.047105407814471
return_mean1    27.573805
return_mean2    27.845208
return_sd1      20.954205
return_sd2      20.056587
dtype: float64
96.42980429815934
---
[1] 0.04038321 0.03436897 0.23606573 0.25768193

5.01626891571934
5.043278904515042
return_mean1    25.342980
return_mean2    26.495510
return_sd1      20.262706
return_sd2      21.793498
dtype: float64
93.89469258486231
---
[1] 0.04038321 0.03436897 0.23606534 0.38196601

5.043659413911402
5.041500095752573
return_mean1     24.946151
return_mean2     35.233378
return_s

# ou46 Powell with no seed

In [15]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing


# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(num_sim=num_sim,
                          mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                          sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                          xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

# Define the function of transforming returns data into feature statistics (or moments).
def cal_stats(n_return, n_price=None):
    """ 'mean' and 'sd' checked
    'skewness' and 'kurtosis' checked
    (different expressions of calculation from intro to stat finance)
    8 statistics
    """
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]

    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    autocorr1 = return_series1.apply(lambda x: x.autocorr(lag=1))

    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    autocorr2 = return_series2.apply(lambda x: x.autocorr(lag=1))

    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2, autocorr1, autocorr2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2',
        'return_autocorrelation1', 'return_autocorrelation2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


def multi_process(iter):

    print(iter)

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1] 0.5 0.5 0.5 0.5

5.02469427073406
5.047848895949557
return_mean1               157.173920
return_mean2               152.680834
return_sd1                 400.121252
return_sd2                 387.849117
return_autocorrelation1     17.661219
return_autocorrelation2     18.400089
dtype: float64
1133.8864303853245
---
[1] 0.381966 0.500000 0.500000 0.500000

4.983493209480448
5.035928447341033
return_mean1               119.454290
return_mean2               149.531669
return_sd1                 397.903832
return_sd2                 378.427872
return_autocorrelation1     17.688458
return_autocorrelation2     18.920606
dtype: float64
1081.9267270901673
---
[1] 0.618034 0.500000 0.500000 0.500000

5.071042606564629
5.056343909896995
return_mean1               181.263451
return_mean2               151.486463
return_sd1                 401.959757
return_sd2                 382.677653
return_autocorrelation1     17.549808
return_autocorrelation2     18.346227
dtype: float64
1153.28335937

dtype: float64
937.5357353242755
---
[1] 0.04303952 0.14589803 0.50000000 0.50000000

4.9778924565796645
5.06597414505962
return_mean1                45.346037
return_mean2                53.838905
return_sd1                 405.349841
return_sd2                 380.146093
return_autocorrelation1     18.665250
return_autocorrelation2     17.342559
dtype: float64
920.6886837329394
---
[1] 0.04303952 0.09016994 0.50000000 0.50000000

4.978317308838617
5.062240156668056
return_mean1                42.508814
return_mean2                49.452485
return_sd1                 404.114094
return_sd2                 377.986100
return_autocorrelation1     17.994958
return_autocorrelation2     15.241282
dtype: float64
907.2977335258222
---
[1] 0.04303952 0.05572809 0.50000000 0.50000000

5.047609222279056
5.091065714443486
return_mean1                43.684615
return_mean2                42.459364
return_sd1                 398.801761
return_sd2                 381.952781
return_autocorrelation1   

dtype: float64
895.679137372979
---
[1] 0.04303952 0.07783333 0.50000000 0.50000000

4.988400866026541
5.052129813741533
return_mean1                43.653395
return_mean2                45.562839
return_sd1                 399.140748
return_sd2                 382.114534
return_autocorrelation1     16.009528
return_autocorrelation2     17.439790
dtype: float64
903.9208349512148
---
[1] 0.04303952 0.07783600 0.50000000 0.50000000

5.038362764174898
4.984695291159646
return_mean1                45.580127
return_mean2                44.761241
return_sd1                 402.739943
return_sd2                 378.522288
return_autocorrelation1     18.667354
return_autocorrelation2     15.782060
dtype: float64
906.0530127050602
---
[1] 0.04303952 0.07783435 0.50000000 0.50000000

4.971097436423111
4.998307253943493
return_mean1                46.375701
return_mean2                46.551461
return_sd1                 401.853338
return_sd2                 380.169355
return_autocorrelation1    

dtype: float64
502.20578093154427
---
[1] 0.04303952 0.07783498 0.24585742 0.38196601

5.026010187887092
5.023782363269785
return_mean1                24.669311
return_mean2                36.870712
return_sd1                  19.727385
return_sd2                 195.605550
return_autocorrelation1     18.710994
return_autocorrelation2     17.176470
dtype: float64
312.76042271440787
---
[1] 0.04303952 0.07783498 0.24585742 0.61803399

4.984820563797267
5.022753160603188
return_mean1                27.397625
return_mean2                49.408961
return_sd1                  18.457231
return_sd2                 565.783947
return_autocorrelation1     18.253391
return_autocorrelation2     17.011842
dtype: float64
696.3129950457154
---
[1] 0.04303952 0.07783498 0.24585742 0.23606798

5.076693744153239
5.067974388943512
return_mean1               28.404783
return_mean2               29.546826
return_sd1                 19.723158
return_sd2                 36.476313
return_autocorrelation1    1

dtype: float64
130.81325129174775
---
[1] 0.04303952 0.07783498 0.24585742 0.25834942

4.958571878634127
4.967802619323055
return_mean1               25.477735
return_mean2               28.517970
return_sd1                 19.185263
return_sd2                 20.657758
return_autocorrelation1    18.295855
return_autocorrelation2    18.075471
dtype: float64
130.21005107985903
---
[1] -0.413920950 -0.344330037 -0.008285151  0.016697943

5.011384908290149
5.056033220164676
return_mean1               237.236714
return_mean2               187.019811
return_sd1                 353.423603
return_sd2                 371.529930
return_autocorrelation1    196.390008
return_autocorrelation2     74.212333
dtype: float64
1419.8123995672079
---
[1] 0.38196601 0.07783498 0.24585742 0.25834897

4.983420454750622
5.036549253097091
return_mean1               122.345694
return_mean2                28.040203
return_sd1                  19.666089
return_sd2                  20.078825
return_autocorrelatio

dtype: float64
135.12007220584033
---
[1] 0.08080208 0.09432169 0.24585742 0.25834897

5.023328706004429
5.062710055880413
return_mean1               30.118085
return_mean2               33.305730
return_sd1                 21.541668
return_sd2                 21.867795
return_autocorrelation1    18.505806
return_autocorrelation2    17.704963
dtype: float64
143.04404743842196
---
[1] 0.08080208 0.09362292 0.24585742 0.25834897

4.9980300037395775
4.934347356875386
return_mean1               31.426764
return_mean2               33.040171
return_sd1                 20.786307
return_sd2                 22.753549
return_autocorrelation1    18.103260
return_autocorrelation2    16.777205
dtype: float64
142.887256941897
---
[1] 0.08080208 0.09389003 0.24585742 0.25834897

5.006916278065638
4.976334674683252
return_mean1               32.242332
return_mean2               32.687791
return_sd1                 20.214768
return_sd2                 20.314162
return_autocorrelation1    18.447480
ret

dtype: float64
139.83990253519366
---
[1] 0.08080208 0.09362408 0.23548252 0.25834897

5.062930488529632
4.940464818405563
return_mean1               30.389085
return_mean2               30.389351
return_sd1                 18.700779
return_sd2                 19.979121
return_autocorrelation1    17.405977
return_autocorrelation2    18.778157
dtype: float64
135.64246996756046
---
[1] 0.08080208 0.09362408 0.23532281 0.25834897

4.990627832034855
4.9943578504312365
return_mean1               31.769166
return_mean2               33.469264
return_sd1                 22.497725
return_sd2                 22.837569
return_autocorrelation1    17.103431
return_autocorrelation2    16.311334
dtype: float64
143.98848897976853
---
[1] 0.08080208 0.09362408 0.23542072 0.25834897

5.008637066299706
5.029146581456319
return_mean1               30.028100
return_mean2               34.351615
return_sd1                 22.646519
return_sd2                 21.257372
return_autocorrelation1    17.567508
r

dtype: float64
144.61384562177986
---
[1] 0.08080208 0.09362408 0.23538252 0.26402757

4.994022257875005
5.02761272381394
return_mean1               30.589561
return_mean2               32.406014
return_sd1                 22.419371
return_sd2                 23.005072
return_autocorrelation1    18.420651
return_autocorrelation2    19.548782
dtype: float64
146.3894515330343
---
[1] 0.08080208 0.09362408 0.23538252 0.26431857

4.96124366984412
5.057370590268713
return_mean1               30.698924
return_mean2               33.632187
return_sd1                 22.746815
return_sd2                 21.523857
return_autocorrelation1    18.570739
return_autocorrelation2    15.825718
dtype: float64
142.99824074322183
---
[1] 0.08080208 0.09362408 0.23538252 0.26413735

5.009783386545974
4.9345514976055
return_mean1               30.738473
return_mean2               31.389352
return_sd1                 20.197196
return_sd2                 22.190940
return_autocorrelation1    17.574159
return_

# ou48 Powell with no seed

In [17]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing


# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(num_sim=num_sim,
                          mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                          sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                          xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

# Define the function of transforming returns data into feature statistics (or moments).
def cal_stats(n_return, n_price=None):
    """ 'mean' and 'sd' checked
    'skewness' and 'kurtosis' checked
    (different expressions of calculation from intro to stat finance)
    8 statistics
    """
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]

    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    skew1 = return_series1.skew(axis=0).values
    kurtosis1 = return_series1.kurtosis(axis=0).values

    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    skew2 = return_series2.skew(axis=0).values
    kurtosis2 = return_series2.kurtosis(axis=0).values

    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2, skew1, skew2, kurtosis1, kurtosis2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2',
        'return_skew1', 'return_skew2',
        'return_kurtosis1', 'return_kurtosis2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


def multi_process(iter):

    print(iter)

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1] 0.5 0.5 0.5 0.5

4.947538265121531
4.979031732459553
return_mean1        156.083251
return_mean2        146.663669
return_sd1          401.287496
return_sd2          376.446234
return_skew1         40.169847
return_skew2         42.293827
return_kurtosis1     90.113958
return_kurtosis2     86.675651
dtype: float64
1339.7339332654592
---
[1] 0.381966 0.500000 0.500000 0.500000

4.95007487094909
5.0418232884736085
return_mean1        120.875457
return_mean2        148.126260
return_sd1          404.335380
return_sd2          383.498810
return_skew1         44.616134
return_skew2         45.262562
return_kurtosis1     78.966167
return_kurtosis2     78.984823
dtype: float64
1304.6655944987465
---
[1] 0.618034 0.500000 0.500000 0.500000

5.048286232293009
4.893669729892564
return_mean1        179.237703
return_mean2        148.182576
return_sd1          402.760039
return_sd2          381.671157
return_skew1         47.566968
return_skew2         38.834449
return_kurtosis1     87.53972

dtype: float64
1228.0948820155065
---
[1] 0.06354067 0.50000000 0.50000000 0.50000000

5.008719577133006
4.924144831212112
return_mean1         45.412035
return_mean2        140.449351
return_sd1          399.698411
return_sd2          375.850961
return_skew1         44.273948
return_skew2         42.753549
return_kurtosis1     76.606492
return_kurtosis2     81.798131
dtype: float64
1206.8428785338044
---
[1] 0.06353605 0.50000000 0.50000000 0.50000000

5.070709149146099
4.9909401210031294
return_mean1         45.107158
return_mean2        151.407505
return_sd1          403.509719
return_sd2          383.514439
return_skew1         46.576649
return_skew2         43.827750
return_kurtosis1     82.990607
return_kurtosis2     83.512327
dtype: float64
1240.4461546930675
---
[1] 0.0635387 0.5000000 0.5000000 0.5000000

5.015420107462512
4.960742353396461
return_mean1         39.636669
return_mean2        146.248107
return_sd1          399.420129
return_sd2          379.085540
return_skew1  


5.049148473371275
5.006863745993029
return_mean1         43.099311
return_mean2         48.189001
return_sd1          400.329107
return_sd2          382.872945
return_skew1         41.364926
return_skew2         41.074633
return_kurtosis1     81.519238
return_kurtosis2     82.638801
dtype: float64
1121.0879619016034
---
[1] 0.06354033 0.09020824 0.50000000 0.50000000

4.986767525618311
4.985965822657311
return_mean1         41.820678
return_mean2         45.316064
return_sd1          402.447650
return_sd2          383.595877
return_skew1         48.042819
return_skew2         43.365351
return_kurtosis1     81.331652
return_kurtosis2     81.643050
dtype: float64
1127.5631413587857
---
[1] 0.06354033 0.09015434 0.50000000 0.50000000

5.035668689052171
5.0519814180648694
return_mean1         41.770834
return_mean2         42.115180
return_sd1          402.360348
return_sd2          377.892097
return_skew1         40.292018
return_skew2         45.628115
return_kurtosis1     82.786459
ret

dtype: float64
725.1990283425578
---
[1] 0.06354033 0.09016994 0.25042752 0.50000000

4.996634975962308
5.1595681449556725
return_mean1         29.368445
return_mean2         48.175089
return_sd1           19.481013
return_sd2          382.571026
return_skew1         43.431974
return_skew2         47.313379
return_kurtosis1     81.308656
return_kurtosis2     95.099122
dtype: float64
746.7487040492352
---
[1] 0.06354033 0.09016994 0.24964405 0.50000000

5.00478490002968
5.015361431141746
return_mean1         28.188024
return_mean2         46.805871
return_sd1           21.427054
return_sd2          378.595918
return_skew1         39.600399
return_skew2         41.415306
return_kurtosis1     80.498737
return_kurtosis2     76.837689
dtype: float64
713.3689991155206
---
[1] 0.06354033 0.09016994 0.24943521 0.50000000

4.981849917204917
5.076398468242634
return_mean1         29.257976
return_mean2         43.837455
return_sd1           20.077995
return_sd2          380.437092
return_skew1  

4.991449935443023
return_mean1        27.981856
return_mean2        30.398564
return_sd1          19.970966
return_sd2          23.501539
return_skew1        42.462563
return_skew2        44.335355
return_kurtosis1    82.975978
return_kurtosis2    80.219254
dtype: float64
351.8460741489547
---
[1] 0.06354033 0.09016994 0.24964405 0.26430666

5.001676442010145
5.031281553171929
return_mean1        29.659334
return_mean2        33.899869
return_sd1          21.077615
return_sd2          24.171427
return_skew1        43.091632
return_skew2        41.310150
return_kurtosis1    83.438724
return_kurtosis2    87.731469
dtype: float64
364.3802208749077
---
[1] 0.06354033 0.09016994 0.24964405 0.26490885

4.988409850169655
5.051335771413069
return_mean1        29.447107
return_mean2        31.555637
return_sd1          18.374259
return_sd2          22.470835
return_skew1        43.521472
return_skew2        41.960207
return_kurtosis1    92.037555
return_kurtosis2    83.138903
dtype: float64
362

dtype: float64
361.1473430885501
---
[1] 0.18401876 0.09016994 0.24964405 0.26465353

5.052766679454295
5.061633292641441
return_mean1        56.703606
return_mean2        33.917705
return_sd1          21.773637
return_sd2          21.680082
return_skew1        42.072682
return_skew2        41.455939
return_kurtosis1    78.157187
return_kurtosis2    83.498386
dtype: float64
379.25922393477595
---
[1] 0.08846938 0.09016994 0.24964405 0.26465353

5.068321936596888
5.0127606689880855
return_mean1        32.933647
return_mean2        31.322872
return_sd1          20.682874
return_sd2          23.356677
return_skew1        43.369094
return_skew2        42.828595
return_kurtosis1    77.901060
return_kurtosis2    76.551255
dtype: float64
348.9460737007456
---
[1] 0.06734490 0.09016994 0.24964405 0.26465353

5.065366699461845
5.032447195066004
return_mean1        26.646072
return_mean2        32.934191
return_sd1          21.353937
return_sd2          24.178859
return_skew1        45.984906
re

dtype: float64
365.9279818590306
---
[1] 0.08847015 0.09016994 0.24964405 0.26465353

4.974924135274181
5.0149957746951745
return_mean1        31.534822
return_mean2        32.334139
return_sd1          21.212595
return_sd2          23.607990
return_skew1        42.683451
return_skew2        47.292619
return_kurtosis1    94.551499
return_kurtosis2    89.294705
dtype: float64
382.51182126270703
---
[1] 0.08846884 0.09016994 0.24964405 0.26465353

4.996798044003257
5.00441967932769
return_mean1        29.410921
return_mean2        34.446935
return_sd1          20.053777
return_sd2          22.328316
return_skew1        42.942150
return_skew2        46.872695
return_kurtosis1    77.692299
return_kurtosis2    87.262754
dtype: float64
361.00984713427084
---
[1] 0.08846971 0.09016994 0.24964405 0.26465353

5.0301841653002475
5.01140307638943
return_mean1        30.897242
return_mean2        35.095718
return_sd1          20.196919
return_sd2          21.672064
return_skew1        43.853994
re

dtype: float64
360.7675658062527
---
[1] 0.08846938 0.02415393 0.24964405 0.26465353

5.004701678192961
5.020435286336347
return_mean1        31.832711
return_mean2        30.813150
return_sd1          19.217595
return_sd2          23.292193
return_skew1        44.320835
return_skew2        41.865687
return_kurtosis1    83.569249
return_kurtosis2    91.153511
dtype: float64
366.06493044545624
---
[1] 0.08846938 0.02419751 0.24964405 0.26465353

5.010175970541958
5.0006592395221645
return_mean1        33.293686
return_mean2        29.182628
return_sd1          21.187000
return_sd2          22.728081
return_skew1        41.615812
return_skew2        48.588399
return_kurtosis1    93.660347
return_kurtosis2    83.251142
dtype: float64
373.5070947977766
---
[1] 0.08846938 0.02417166 0.24964405 0.26465353

5.036277326693094
5.046396156622443
return_mean1        33.949283
return_mean2        30.774831
return_sd1          20.040625
return_sd2          22.125681
return_skew1        43.484836
re

dtype: float64
366.2992941140443
---
[1] 0.08846938 0.02418262 0.25999672 0.26465353

5.0299108672599235
5.003418712715686
return_mean1        31.193018
return_mean2        30.287788
return_sd1          28.839637
return_sd2          22.227641
return_skew1        41.381953
return_skew2        45.045205
return_kurtosis1    90.587423
return_kurtosis2    89.849459
dtype: float64
379.4121232813143
---
[1] 0.08846938 0.02418262 0.26009075 0.26465353

4.978675280381769
5.029572509714525
return_mean1        30.556261
return_mean2        26.698192
return_sd1          30.259379
return_sd2          22.395111
return_skew1        43.529876
return_skew2        42.559389
return_kurtosis1    78.317488
return_kurtosis2    90.660997
dtype: float64
364.9766934188978
---
[1] 0.08846938 0.02418262 0.26012667 0.26465353

5.050640589532673
4.994727642633982
return_mean1        30.774374
return_mean2        29.057049
return_sd1          29.372394
return_sd2          22.999739
return_skew1        47.062096
ret

dtype: float64
354.52632555030283
---
[1] 0.08846938 0.02418262 0.26014763 0.26268128

4.988930672315766
5.048403954306725
return_mean1        31.295589
return_mean2        28.963845
return_sd1          28.989050
return_sd2          21.733715
return_skew1        41.287723
return_skew2        43.319994
return_kurtosis1    83.753782
return_kurtosis2    87.201576
dtype: float64
366.54527295571603
---
[1] 0.08846938 0.02418262 0.26014763 0.26290756

5.004414772183041
4.998795890389289
return_mean1        32.708473
return_mean2        26.576471
return_sd1          28.998046
return_sd2          20.862552
return_skew1        44.352614
return_skew2        43.548719
return_kurtosis1    90.230366
return_kurtosis2    81.724060
dtype: float64
369.00130137114763
---
[1] 0.08846938 0.02418262 0.26014763 0.26276771

5.015542276063924
5.062258508935195
return_mean1        30.629647
return_mean2        32.660597
return_sd1          30.050106
return_sd2          21.661034
return_skew1        41.014604
r

# ou410 Powell with no seed

In [18]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing


# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(num_sim=num_sim,
                          mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                          sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                          xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

# Define the function of transforming returns data into feature statistics (or moments).
def cal_stats(n_return, n_price=None):
    """ 'mean' and 'sd' checked
    'skewness' and 'kurtosis' checked
    (different expressions of calculation from intro to stat finance)
    8 statistics
    """
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]

    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    skew1 = return_series1.skew(axis=0).values
    kurtosis1 = return_series1.kurtosis(axis=0).values
    autocorr1 = return_series1.apply(lambda x: x.autocorr(lag=1))

    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    skew2 = return_series2.skew(axis=0).values
    kurtosis2 = return_series2.kurtosis(axis=0).values
    autocorr2 = return_series2.apply(lambda x: x.autocorr(lag=1))

    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2, skew1, skew2, kurtosis1, kurtosis2, autocorr1, autocorr2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2',
        'return_skew1', 'return_skew2',
        'return_kurtosis1', 'return_kurtosis2',
        'return_autocorrelation1', 'return_autocorrelation2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


def multi_process(iter):

    print(iter)

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1] 0.5 0.5 0.5 0.5

5.063086280956694
4.957685805992859
return_mean1               153.025100
return_mean2               150.231414
return_sd1                 403.717978
return_sd2                 383.384557
return_skew1                44.402530
return_skew2                46.191973
return_kurtosis1            84.381136
return_kurtosis2            89.642087
return_autocorrelation1     16.864902
return_autocorrelation2     16.167899
dtype: float64
1388.0095743900317
---
[1] 0.381966 0.500000 0.500000 0.500000

4.990822022434134
5.044230074834597
return_mean1               118.723229
return_mean2               155.946268
return_sd1                 399.879757
return_sd2                 377.416257
return_skew1                42.185499
return_skew2                44.283059
return_kurtosis1            78.639888
return_kurtosis2            90.628182
return_autocorrelation1     18.826944
return_autocorrelation2     15.810957
dtype: float64
1342.3400389257881
---
[1] 0.618034 0.500000 0.5000

dtype: float64
1276.6642055057396
---
[1] 0.09709154 0.50000000 0.50000000 0.50000000

5.027991292606182
4.968259394094517
return_mean1                42.436172
return_mean2               144.572288
return_sd1                 402.255444
return_sd2                 379.197906
return_skew1                43.057496
return_skew2                37.779763
return_kurtosis1            85.830799
return_kurtosis2            90.263580
return_autocorrelation1     18.572752
return_autocorrelation2     17.382640
dtype: float64
1261.3488401090106
---
[1] 0.09697095 0.50000000 0.50000000 0.50000000

5.038320661711364
5.043834522038178
return_mean1                48.581193
return_mean2               148.036187
return_sd1                 400.541878
return_sd2                 381.325372
return_skew1                44.282932
return_skew2                42.484175
return_kurtosis1            70.559353
return_kurtosis2            83.660140
return_autocorrelation1     17.198973
return_autocorrelation2     18.5

dtype: float64
1180.1034217766228
---
[1] 0.09697095 0.11672611 0.50000000 0.50000000

5.004621256048891
5.024975112940504
return_mean1                45.762538
return_mean2                50.023844
return_sd1                 400.675437
return_sd2                 382.990410
return_skew1                41.192394
return_skew2                43.345521
return_kurtosis1            90.905899
return_kurtosis2            88.023830
return_autocorrelation1     17.861824
return_autocorrelation2     16.592642
dtype: float64
1177.3743398272788
---
[1] 0.09697095 0.07214070 0.50000000 0.50000000

5.001740260366938
5.077054281922711
return_mean1                45.784377
return_mean2                46.955554
return_sd1                 402.246434
return_sd2                 383.352358
return_skew1                42.791478
return_skew2                41.511464
return_kurtosis1            91.767108
return_kurtosis2            83.698324
return_autocorrelation1     17.863028
return_autocorrelation2     16.2

dtype: float64
1164.3980518912895
---
[1] 0.09697095 0.02757319 0.50000000 0.50000000

5.0253670126512455
4.908676791969986
return_mean1                46.140723
return_mean2                41.859357
return_sd1                 401.790430
return_sd2                 379.553875
return_skew1                42.203393
return_skew2                41.898558
return_kurtosis1            77.641497
return_kurtosis2            81.475873
return_autocorrelation1     17.734240
return_autocorrelation2     16.823745
dtype: float64
1147.1216905562644
---
[1] 0.09697095 0.02758424 0.50000000 0.50000000

5.012161979681892
4.978801506631107
return_mean1                46.437502
return_mean2                42.956124
return_sd1                 402.674543
return_sd2                 381.170752
return_skew1                41.964174
return_skew2                43.490139
return_kurtosis1            85.844177
return_kurtosis2            86.171676
return_autocorrelation1     17.536272
return_autocorrelation2     16.

dtype: float64
773.1126290381051
---
[1] 0.09697095 0.02757319 0.25662509 0.50000000

5.010719073036557
5.084710303864758
return_mean1                33.580290
return_mean2                40.599715
return_sd1                  26.819840
return_sd2                 382.892671
return_skew1                46.654388
return_skew2                42.011970
return_kurtosis1            79.927468
return_kurtosis2            85.919636
return_autocorrelation1     17.594995
return_autocorrelation2     18.174933
dtype: float64
774.1759076638048
---
[1] 0.09697095 0.02757319 0.25569617 0.50000000

5.022665353253072
5.017589026957208
return_mean1                31.803542
return_mean2                44.455567
return_sd1                  24.630181
return_sd2                 377.534111
return_skew1                43.127845
return_skew2                42.197844
return_kurtosis1            80.448839
return_kurtosis2            80.007780
return_autocorrelation1     18.766706
return_autocorrelation2     16.217

dtype: float64
765.4996251506791
---
[1] 0.09697095 0.02757319 0.25512141 0.38196601

4.9836892167111495
5.007446917947252
return_mean1                36.536788
return_mean2                35.318700
return_sd1                  26.398990
return_sd2                 196.873310
return_skew1                41.398413
return_skew2                39.889342
return_kurtosis1            84.611994
return_kurtosis2            81.967603
return_autocorrelation1     17.680470
return_autocorrelation2     16.787677
dtype: float64
577.4632883301488
---
[1] 0.09697095 0.02757319 0.25512141 0.61803399

4.9996723604312026
4.850958435631157
return_mean1                34.380041
return_mean2                56.378387
return_sd1                  25.964872
return_sd2                 564.451664
return_skew1                41.890573
return_skew2                43.195776
return_kurtosis1            83.728621
return_kurtosis2            80.245592
return_autocorrelation1     17.009648
return_autocorrelation2     17.7

dtype: float64
404.0262572604669
---
[1] 0.09697095 0.02757319 0.25512141 0.25553165

5.032873437785484
5.0696895234514505
return_mean1               35.508960
return_mean2               29.191645
return_sd1                 24.608154
return_sd2                 19.733186
return_skew1               43.172179
return_skew2               41.931506
return_kurtosis1           86.734795
return_kurtosis2           87.644728
return_autocorrelation1    17.708397
return_autocorrelation2    16.104062
dtype: float64
402.3376117254678
---
[1] 0.09697095 0.02757319 0.25512141 0.25554903

5.050622737897002
4.987717370242599
return_mean1               33.306997
return_mean2               27.305646
return_sd1                 23.449437
return_sd2                 20.917048
return_skew1               42.601721
return_skew2               43.488584
return_kurtosis1           82.549139
return_kurtosis2           82.631233
return_autocorrelation1    17.922618
return_autocorrelation2    18.267301
dtype: float64


dtype: float64
394.8549731196723
---
[1] 0.05580440 0.02757319 0.25512141 0.25554239

5.064677282151972
5.003033363863724
return_mean1               27.778503
return_mean2               25.724117
return_sd1                 25.827588
return_sd2                 21.253469
return_skew1               39.453987
return_skew2               45.166504
return_kurtosis1           77.977912
return_kurtosis2           87.534833
return_autocorrelation1    15.942493
return_autocorrelation2    16.632273
dtype: float64
383.2916783114239
---
[1] 0.05628473 0.02757319 0.25512141 0.25554239

5.0186305671213916
5.022973093361738
return_mean1               27.374137
return_mean2               29.873966
return_sd1                 25.814287
return_sd2                 21.176998
return_skew1               42.960794
return_skew2               44.831365
return_kurtosis1           85.036207
return_kurtosis2           88.189075
return_autocorrelation1    15.892468
return_autocorrelation2    18.766552
dtype: float64


dtype: float64
384.86917221650657
---
[1] 0.05578803 0.02757319 0.25512141 0.25554239

4.990976224840545
5.00507935141549
return_mean1               29.914911
return_mean2               26.467230
return_sd1                 23.956086
return_sd2                 21.071544
return_skew1               40.114597
return_skew2               42.699013
return_kurtosis1           83.907573
return_kurtosis2           87.920767
return_autocorrelation1    18.176913
return_autocorrelation2    15.672531
dtype: float64
389.901164388763
---
[1] 0.05578891 0.02757319 0.25512141 0.25554239

5.044207697343075
5.015558932157242
return_mean1               28.281976
return_mean2               32.357609
return_sd1                 24.658534
return_sd2                 21.084372
return_skew1               41.538769
return_skew2               40.912777
return_kurtosis1           86.832600
return_kurtosis2           82.096932
return_autocorrelation1    18.297676
return_autocorrelation2    17.226282
dtype: float64
39

dtype: float64
398.07712699294933
---
[1] 0.05578857 0.09527120 0.25512141 0.25554239

5.029071649683696
4.999634107904565
return_mean1               29.089689
return_mean2               33.610814
return_sd1                 25.265801
return_sd2                 22.005377
return_skew1               43.212209
return_skew2               42.457739
return_kurtosis1           84.029653
return_kurtosis2           87.265870
return_autocorrelation1    19.274721
return_autocorrelation2    17.648050
dtype: float64
403.8599237878659
---
[1] 0.05578857 0.09469180 0.25512141 0.25554239

5.011540828163536
4.98998271505621
return_mean1               25.883630
return_mean2               32.760406
return_sd1                 25.212176
return_sd2                 19.711088
return_skew1               42.590882
return_skew2               48.282727
return_kurtosis1           82.116655
return_kurtosis2           94.177266
return_autocorrelation1    17.033672
return_autocorrelation2    17.027173
dtype: float64
4

dtype: float64
988.8897011641895
---
[1] 0.05578857 0.09492900 0.23606798 0.25554239

4.9846792503762565
5.02485891676233
return_mean1               24.622538
return_mean2               31.805831
return_sd1                 21.210196
return_sd2                 19.832416
return_skew1               40.163017
return_skew2               45.790304
return_kurtosis1           85.639103
return_kurtosis2           73.907556
return_autocorrelation1    17.271189
return_autocorrelation2    17.806595
dtype: float64
378.0487444995307
---
[1] 0.05578857 0.09492900 0.14589803 0.25554239

4.991439040528295
5.016143922912667
return_mean1                22.458378
return_mean2                33.237242
return_sd1                 154.118241
return_sd2                  21.305704
return_skew1                40.431182
return_skew2                39.872868
return_kurtosis1            82.820968
return_kurtosis2            87.284681
return_autocorrelation1     18.106497
return_autocorrelation2     17.083468
dtype:

dtype: float64
387.39125302327494
---
[1] 0.05578857 0.09492900 0.23606798 0.30622695

5.032658267514025
5.031029108963019
return_mean1               25.943801
return_mean2               35.661223
return_sd1                 22.618847
return_sd2                 80.069378
return_skew1               43.587623
return_skew2               40.470278
return_kurtosis1           80.082596
return_kurtosis2           82.767647
return_autocorrelation1    18.526604
return_autocorrelation2    17.408775
dtype: float64
447.13677028524467
---
[1] 0.05578857 0.09492900 0.23606798 0.27729720

5.09873172581341
5.0138856000453025
return_mean1               27.792566
return_mean2               36.732077
return_sd1                 20.940909
return_sd2                 33.722828
return_skew1               40.974814
return_skew2               38.656054
return_kurtosis1           82.882683
return_kurtosis2           81.497291
return_autocorrelation1    18.142088
return_autocorrelation2    17.815332
dtype: float64

dtype: float64
395.8933881419513
---
[1] 0.05578857 0.09492900 0.23606798 0.26081897

5.04905197685156
5.028789971006458
return_mean1               26.902602
return_mean2               33.774336
return_sd1                 23.091445
return_sd2                 22.080859
return_skew1               40.925079
return_skew2               43.487710
return_kurtosis1           85.783555
return_kurtosis2           88.609081
return_autocorrelation1    18.052450
return_autocorrelation2    16.690511
dtype: float64
399.39762947269463
---
[1] 0.05578857 0.09492900 0.23606798 0.26082319

5.083604073650632
5.070875701625604
return_mean1               25.091009
return_mean2               35.377532
return_sd1                 21.309224
return_sd2                 21.786135
return_skew1               41.007996
return_skew2               43.313570
return_kurtosis1           87.238776
return_kurtosis2           77.944775
return_autocorrelation1    17.377559
return_autocorrelation2    17.514875
dtype: float64
3

dtype: float64
394.7668391538154
---
[1] 0.02469377 0.09492900 0.23606798 0.26082158

4.957391810014856
5.033443526310767
return_mean1               25.479235
return_mean2               33.673180
return_sd1                 23.072375
return_sd2                 20.902112
return_skew1               44.456827
return_skew2               44.195385
return_kurtosis1           81.447774
return_kurtosis2           95.575698
return_autocorrelation1    17.596613
return_autocorrelation2    18.353642
dtype: float64
404.7528419510478
---
[1] 0.02751384 0.09492900 0.23606798 0.26082158

5.009953245989952
5.047470576281045
return_mean1               25.031021
return_mean2               34.804808
return_sd1                 21.083523
return_sd2                 22.614640
return_skew1               39.468994
return_skew2               42.090140
return_kurtosis1           77.016639
return_kurtosis2           84.755059
return_autocorrelation1    17.210072
return_autocorrelation2    18.303698
dtype: float64
3

dtype: float64
392.2940293365872
---
[1] 0.02617951 0.09492900 0.23606798 0.26082158

5.048601649311654
5.010428494856847
return_mean1               26.136085
return_mean2               36.342201
return_sd1                 19.636331
return_sd2                 19.108073
return_skew1               41.838482
return_skew2               43.333923
return_kurtosis1           77.821652
return_kurtosis2           81.402540
return_autocorrelation1    16.803248
return_autocorrelation2    17.323074
dtype: float64
379.7456094110275
---
[1] 0.02618051 0.09492900 0.23606798 0.26082158

5.048565912881001
5.015204679306621
return_mean1               25.320871
return_mean2               35.513379
return_sd1                 21.235125
return_sd2                 21.072647
return_skew1               43.643091
return_skew2               43.065696
return_kurtosis1           80.731305
return_kurtosis2           85.783674
return_autocorrelation1    17.059819
return_autocorrelation2    17.452305
dtype: float64
3

dtype: float64
388.88692509508286
---
[1] 0.02618013 0.05572776 0.14589803 0.26082158

5.025126542414943
5.007697864490354
return_mean1                22.364273
return_mean2                30.206303
return_sd1                 153.684665
return_sd2                  19.901325
return_skew1                43.021834
return_skew2                43.925885
return_kurtosis1            78.927896
return_kurtosis2            84.709443
return_autocorrelation1     17.677788
return_autocorrelation2     17.120442
dtype: float64
511.5398543421478
---
[1] 0.02618013 0.05572776 0.24761658 0.26082158

5.045783652203207
5.042956009508271
return_mean1               27.667630
return_mean2               26.462109
return_sd1                 19.798635
return_sd2                 21.570378
return_skew1               45.452363
return_skew2               38.676159
return_kurtosis1           85.299566
return_kurtosis2           82.048069
return_autocorrelation1    17.630656
return_autocorrelation2    17.470374
dtype

dtype: float64
391.5228715449806
---
[1] 0.02618013 0.05572776 0.24246506 0.26082158

5.022321950264781
5.05402946405669
return_mean1               26.388289
return_mean2               29.504277
return_sd1                 20.748784
return_sd2                 22.125245
return_skew1               40.520797
return_skew2               44.585255
return_kurtosis1           84.716306
return_kurtosis2           86.530816
return_autocorrelation1    17.860592
return_autocorrelation2    17.161191
dtype: float64
390.14155072047294
---
[1] 0.02618013 0.05572776 0.24246276 0.26082158

5.013580913041394
5.061578339782186
return_mean1               26.496942
return_mean2               26.725741
return_sd1                 20.115346
return_sd2                 21.295171
return_skew1               42.905017
return_skew2               43.523248
return_kurtosis1           79.187937
return_kurtosis2           91.105748
return_autocorrelation1    19.310380
return_autocorrelation2    17.464512
dtype: float64
3

dtype: float64
386.0584154767667
---
[1] 0.02618013 0.05572776 0.24246364 0.24875236

4.960669355565677
5.010331014541533
return_mean1               26.268129
return_mean2               29.499645
return_sd1                 19.744874
return_sd2                 21.595139
return_skew1               43.418047
return_skew2               43.748598
return_kurtosis1           81.036633
return_kurtosis2           85.129074
return_autocorrelation1    17.909322
return_autocorrelation2    18.997641
dtype: float64
387.3471021053793
---
[1] 0.02618013 0.05572776 0.24246364 0.24818362

5.037928610257618
5.049205967522542
return_mean1               24.275059
return_mean2               28.171891
return_sd1                 19.206974
return_sd2                 22.927120
return_skew1               45.814409
return_skew2               43.587999
return_kurtosis1           83.263380
return_kurtosis2           85.745353
return_autocorrelation1    16.682836
return_autocorrelation2    17.180307
dtype: float64
3

dtype: float64
494.22377449084803
---
[1] 0.61803399 0.05572776 0.24246364 0.24844961

5.020393101115906
5.003236816039682
return_mean1               184.574850
return_mean2                28.161245
return_sd1                  20.956444
return_sd2                  22.620811
return_skew1                47.861357
return_skew2                45.904080
return_kurtosis1            87.056429
return_kurtosis2            85.615070
return_autocorrelation1     16.999564
return_autocorrelation2     17.797174
dtype: float64
557.5470220278434
---
[1] 0.23606798 0.05572776 0.24246364 0.24844961

5.058348112665759
5.043566179399145
return_mean1               76.775890
return_mean2               27.516391
return_sd1                 20.882444
return_sd2                 23.466551
return_skew1               40.328974
return_skew2               42.312755
return_kurtosis1           79.561921
return_kurtosis2           88.027527
return_autocorrelation1    18.535790
return_autocorrelation2    16.914195
dtype

dtype: float64
399.0912952985831
---
[1] 0.06877674 0.05572776 0.24246364 0.24844961

4.966816976724147
5.051007170089235
return_mean1               28.914667
return_mean2               26.508703
return_sd1                 19.345491
return_sd2                 22.513958
return_skew1               42.116883
return_skew2               43.589540
return_kurtosis1           79.725192
return_kurtosis2           79.136376
return_autocorrelation1    17.983674
return_autocorrelation2    16.491937
dtype: float64
376.326419921334
---
[1] 0.06894981 0.05572776 0.24246364 0.24844961

5.039240747464485
5.074667798285171
return_mean1               28.483140
return_mean2               28.383336
return_sd1                 18.829023
return_sd2                 21.971638
return_skew1               46.005396
return_skew2               39.452392
return_kurtosis1           80.925681
return_kurtosis2           88.619686
return_autocorrelation1    16.371400
return_autocorrelation2    17.638557
dtype: float64
38

dtype: float64
407.6718993484691
---
[1] 0.06888371 0.09016994 0.24246364 0.24844961

5.020419056447299
4.986364773804195
return_mean1               29.190716
return_mean2               29.578766
return_sd1                 19.463623
return_sd2                 20.931462
return_skew1               42.175923
return_skew2               42.735181
return_kurtosis1           81.885332
return_kurtosis2           82.458489
return_autocorrelation1    18.911600
return_autocorrelation2    18.310371
dtype: float64
385.6414644779547
---
[1] 0.06888371 0.05572809 0.24246364 0.24844961

5.045385768305745
4.993056084299274
return_mean1               27.357937
return_mean2               29.755575
return_sd1                 19.048754
return_sd2                 23.186130
return_skew1               41.753769
return_skew2               43.354626
return_kurtosis1           80.362337
return_kurtosis2           83.267367
return_autocorrelation1    18.199804
return_autocorrelation2    16.976152
dtype: float64
3

dtype: float64
381.2209113525687
---
[1] 0.06888371 0.05381467 0.24246364 0.24844961

5.055997271023739
5.028208619053836
return_mean1               27.312648
return_mean2               28.379236
return_sd1                 18.799261
return_sd2                 21.988452
return_skew1               43.350303
return_skew2               39.623944
return_kurtosis1           82.485063
return_kurtosis2           86.116101
return_autocorrelation1    17.462924
return_autocorrelation2    17.376892
dtype: float64
382.89482468268244
---
[1] 0.06888371 0.05380633 0.24246364 0.24844961

5.035529959877092
4.981597201399165
return_mean1               27.013856
return_mean2               26.898148
return_sd1                 20.451194
return_sd2                 21.438748
return_skew1               41.473079
return_skew2               43.721454
return_kurtosis1           84.457770
return_kurtosis2           84.798042
return_autocorrelation1    18.496567
return_autocorrelation2    17.604529
dtype: float64


dtype: float64
400.0318886218236
---
[1] 0.06888371 0.05380871 0.22772694 0.24844961

4.998824051350632
5.028619187640811
return_mean1               26.449808
return_mean2               25.144105
return_sd1                 29.492010
return_sd2                 24.554675
return_skew1               42.716447
return_skew2               44.380741
return_kurtosis1           80.701417
return_kurtosis2           84.761414
return_autocorrelation1    17.863000
return_autocorrelation2    17.342049
dtype: float64
393.40566568490095
---
[1] 0.06888371 0.05380871 0.23049676 0.24844961

4.94408664220313
5.0569259658499615
return_mean1               26.963467
return_mean2               26.346561
return_sd1                 27.220432
return_sd2                 22.830839
return_skew1               44.851197
return_skew2               44.592608
return_kurtosis1           85.338074
return_kurtosis2           80.185049
return_autocorrelation1    17.914680
return_autocorrelation2    16.977760
dtype: float64


dtype: float64
406.0341663496657
---
[1] 0.06888371 0.05380871 0.22988440 0.24844961

4.9925426128914285
4.953836633345893
return_mean1               28.179267
return_mean2               27.700484
return_sd1                 25.475745
return_sd2                 20.857543
return_skew1               44.698343
return_skew2               44.186618
return_kurtosis1           82.060485
return_kurtosis2           85.801073
return_autocorrelation1    19.313497
return_autocorrelation2    17.728463
dtype: float64
396.0015178560299
---
[1] 0.06888371 0.05380871 0.22988518 0.24844961

5.094571009922564
5.042692158495621
return_mean1               25.069721
return_mean2               26.224958
return_sd1                 27.247033
return_sd2                 22.502247
return_skew1               44.163786
return_skew2               44.108862
return_kurtosis1           91.085346
return_kurtosis2           85.806051
return_autocorrelation1    16.072172
return_autocorrelation2    16.276520
dtype: float64


dtype: float64
399.9968134213281
---
[1] 0.06888371 0.05380871 0.22988484 0.26359991

4.97339571456014
5.09086787314369
return_mean1               27.572597
return_mean2               32.150713
return_sd1                 26.328078
return_sd2                 22.346811
return_skew1               44.395532
return_skew2               44.986559
return_kurtosis1           86.052058
return_kurtosis2           81.941529
return_autocorrelation1    19.004367
return_autocorrelation2    18.137114
dtype: float64
402.91535811459056
---
[1] 0.06888371 0.05380871 0.22988484 0.26371537

5.0103028593336285
5.06381872643115
return_mean1               30.129866
return_mean2               31.525796
return_sd1                 26.840809
return_sd2                 21.017669
return_skew1               43.863449
return_skew2               43.660097
return_kurtosis1           85.585647
return_kurtosis2           75.718458
return_autocorrelation1    17.941908
return_autocorrelation2    16.892765
dtype: float64
39

# L-BFGS-B

In [5]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(441)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("exp(sigma11)", "exp(sigma12)", "exp(sigma21)", "exp(sigma22)"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))

def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))

def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return

def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, -1000, -1000
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, -1.4118, -1.3574


num_iter = 1
initial0 = [1, 1, -1, -1]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='L-BFGS-B',
                   bounds=[(0., 1), (0., 1), (-10, 0), (-10, 0)], 
                   options={'eps': 0.5, 'gtol': 1e-4, 'iprint': 1, 'ftol': 1e-09})
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1]  1  1 -1 -1

5.043932089409035
4.848796057384303
return_mean1    257.313378
return_mean2    251.792801
return_sd1      200.487853
return_sd2      178.956875
dtype: float64
888.550906163143
---
[1]  0.5  1.0 -1.0 -1.0

5.064931605699332
4.983529890659132
return_mean1    150.487917
return_mean2    248.394881
return_sd1      193.918719
return_sd2      181.083751
dtype: float64
773.8852678939413
---
[1]  1.0  0.5 -1.0 -1.0

5.023942365564527
5.050530316236692
return_mean1    255.502956
return_mean2    147.004769
return_sd1      201.371837
return_sd2      167.890546
dtype: float64
771.7701082987224
---
[1]  1.0  1.0 -0.5 -1.0

5.063860422575563
4.8977924216023005
return_mean1    263.589873
return_mean2    251.211521
return_sd1      575.290367
return_sd2      177.149370
dtype: float64
1267.2411309787794
---
[1]  1.0  1.0 -1.0 -0.5

5.083635029227609
4.941126425652707
return_mean1    259.888573
return_mean2    248.764799
return_sd1      199.502427
return_sd2      551.265025
dtype: float

dtype: float64
937.5687181234716
---
[1]   0.000000   0.500000 -10.000000  -9.977013

5.017271177690762
4.969454534149802
return_mean1     21.715570
return_mean2    149.693060
return_sd1      381.871126
return_sd2      379.978407
dtype: float64
933.2581641002628
---
[1]  0.000000  0.000000 -9.500000 -9.977013

5.046526224477517
5.019473480837054
return_mean1     22.325481
return_mean2     23.669915
return_sd1      383.186378
return_sd2      402.872928
dtype: float64
832.0547015867696
---
[1]   0.000000   0.000000 -10.000000  -9.477013

5.029849614130829
5.01945836114361
return_mean1     26.232097
return_mean2     22.026872
return_sd1      381.203345
return_sd2      401.344199
dtype: float64
830.8065134479084
---
[  0.           0.         -10.          -9.97701282]
0:02:12.149247
[1]   0.000000   0.000000 -10.000000  -9.977013

4.956089208956063
5.019457914502061
return_mean1     22.723460
return_mean2     26.089380
return_sd1      380.674610
return_sd2      404.183294
dtype: float64
8

Process ForkPoolWorker-90:
Process ForkPoolWorker-93:
Process ForkPoolWorker-91:
Process ForkPoolWorker-89:
Process ForkPoolWorker-92:
Process ForkPoolWorker-88:
Process ForkPoolWorker-94:
Process ForkPoolWorker-87:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()


# BasinHopping

In [3]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(441)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("exp(sigma11)", "exp(sigma12)", "exp(sigma21)", "exp(sigma22)"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))

def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))

def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return

def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, -1000, -1000
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, -1.4118, -1.3574


num_iter = 1
initial0 = [1, 1, -1, -1]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = scipy.optimize.basinhopping(
        
        func=loss_function, x0=initial0, niter=300, stepsize=0.1,
        minimizer_kwargs={'method': "L-BFGS-B"})
    
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1]  1  1 -1 -1

5.043932089409035
4.848796057384303
return_mean1    257.313378
return_mean2    251.792801
return_sd1      200.487853
return_sd2      178.956875
dtype: float64
888.550906163143
---
[1]  1  1 -1 -1

5.064931605699332
4.983529890659132
return_mean1    254.207321
return_mean2    248.394881
return_sd1      199.448291
return_sd2      181.083751
dtype: float64
883.1342437458206
---
[1]  1  1 -1 -1

5.023942365564527
5.000547725125207
return_mean1    255.502956
return_mean2    247.950594
return_sd1      201.371837
return_sd2      173.147819
dtype: float64
877.9732064004382
---
[1]  1  1 -1 -1

5.063860422575563
4.8977924216023005
return_mean1    261.661229
return_mean2    251.211521
return_sd1      202.147930
return_sd2      177.149370
dtype: float64
892.1700496038671
---
[1]  1  1 -1 -1

5.083635029227609
4.932762784814812
return_mean1    259.888573
return_mean2    249.592561
return_sd1      199.502427
return_sd2      178.989655
dtype: float64
887.9732157129876
---
[1]  1.4

dtype: float64
885.5233883163237
---
[1]  1.0005203  1.0010161 -1.0003477 -0.9999445

5.017271177690762
4.834498940007365
return_mean1    258.968506
return_mean2    249.690936
return_sd1      200.920286
return_sd2      179.022770
dtype: float64
888.6024980737416
---
[1]  1.0005203  1.0010161 -1.0003477 -0.9999445

5.046526224477517
5.029175238760915
return_mean1    259.556604
return_mean2    249.860454
return_sd1      200.340379
return_sd2      177.784282
dtype: float64
887.5417201017116
---
[1]  1.0005203  1.0010161 -1.0003477 -0.9999445

5.029849614130829
4.913523909830187
return_mean1    253.423881
return_mean2    252.839670
return_sd1      199.601483
return_sd2      177.842708
dtype: float64
883.70774310489
---
[1]  1.0001584  1.0003093 -1.0001058 -0.9999831

4.956089208956063
4.906704494342372
return_mean1    262.585887
return_mean2    249.608363
return_sd1      203.376116
return_sd2      178.677460
dtype: float64
894.2478265695977
---
[1]  1.0001584  1.0003093 -1.0001058 -0.99998

dtype: float64
888.7987832589104
---
[1]  1  1 -1 -1

5.026780202155203
4.95355953415954
return_mean1    256.651500
return_mean2    248.368377
return_sd1      198.737641
return_sd2      177.169637
dtype: float64
880.9271547067849
---
[1]  1  1 -1 -1

5.004780317640271


From cffi callback Process ForkPoolWorker-21:
Process ForkPoolWorker-19:
Process ForkPoolWorker-23:
Process ForkPoolWorker-17:
<function _processevents at 0x7fd419fb1b90>Process ForkPoolWorker-22:


KeyboardInterrupt: 

Process ForkPoolWorker-18:
:
Process ForkPoolWorker-20:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/site-packages/rpy2/rinterface_lib/callbacks.py", line 274, in _processevents
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/ana

4.856773536761149
return_mean1    258.269497
return_mean2    249.573383
return_sd1      201.352865
return_sd2      179.463085
dtype: float64
888.6588308496888
---
[1]  1  1 -1 -1

5.03878764509143
5.061965587967287
return_mean1    257.720888
return_mean2    252.360014
return_sd1      202.860500
return_sd2      178.717994
dtype: float64
891.659394886576
---
[1]  1  1 -1 -1

4.967380382266251
4.849463360349527
return_mean1    255.632700
return_mean2    254.035351
return_sd1      198.207529
return_sd2      179.165119
dtype: float64
887.0406992705418
---
[1]  1  1 -1 -1

4.975963715553491
4.8895338907290995
return_mean1    255.275381
return_mean2    247.860297
return_sd1      199.596290
return_sd2      175.857549
dtype: float64
878.5895169949424
---
[1]  1  1 -1 -1

5.021371513798262
4.884125232290712
return_mean1    255.208470
return_mean2    250.323663
return_sd1      203.186202
return_sd2      178.888601
dtype: float64
887.6069363087001
---
[1]  1  1 -1 -1

4.976624879441185
4.872997325


5.009878944808496
4.751476810772063
return_mean1    323.520443
return_mean2    322.166878
return_sd1      333.921244
return_sd2      576.777489
dtype: float64
1556.386054014716
---
[1]  1.1808751  1.2896636 -0.9534553 -0.7626076

5.037685561965899
4.637889295694369
return_mean1    281.517954
return_mean2    289.591213
return_sd1      234.030181
return_sd2      334.558737
dtype: float64
1139.6980840866981
---
[1]  1.1808751  1.2896636 -0.9534553 -0.7626076

5.041694365858872
4.881713410331922
return_mean1    286.106487
return_mean2    293.477858
return_sd1      230.316034
return_sd2      337.002372
dtype: float64
1146.902750802235
---
[1]  1.1808751  1.2896636 -0.9534553 -0.7626076

4.987327040108075
4.870503243708655
return_mean1    284.110280
return_mean2    291.320436
return_sd1      230.203887
return_sd2      342.754101
dtype: float64
1148.3887041316189
---
[1]  1.1808751  1.2896636 -0.9534553 -0.7626076

5.019084329709779
4.8100747700256035
return_mean1    283.149103
return_mean2 

dtype: float64
846.9294351164527
---
[1]  0.9411862  1.0110531 -1.0709142 -0.9821025

4.96714510645127
4.934493693880225
return_mean1    248.928394
return_mean2    248.771774
return_sd1      157.476247
return_sd2      192.812260
dtype: float64
847.9886756241931
---
[1]  0.9411862  1.0110531 -1.0709142 -0.9821025



Process ForkPoolWorker-16:
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/pool.py", line 44, in mapstar
    return list(map(*args))
  File "<ipython-input-3-5eba658e46d3>", line 170, in multi_process
    minimizer_kwargs={'method': "L-BFGS-B"})
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/site-packages/scipy/optimize/_basinhopping.py", line 693, in basinhopping
    new_global_min = bh.one_cycle()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/site-packages/scipy/optimize/_basinhopping.py",

In [7]:
from scipy.optimize import minimize, rosen, rosen_der

rosen_der

<function scipy.optimize.optimize.rosen_der(x)>