# ou44 Powell

In [5]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(441)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("exp(sigma11)", "exp(sigma12)", "exp(sigma21)", "exp(sigma22)"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return
def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, -1000, -1000
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, -1.4118, -1.3574


num_iter = 1
initial0 = [1, 1, -1, -1]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True})
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1]  1  1 -1 -1

5.043932089409035
4.848796057384303
return_mean1    257.313378
return_mean2    251.792801
return_sd1      200.487853
return_sd2      178.956875
dtype: float64
888.550906163143
---
[1]  1  1 -1 -1

5.064931605699332
4.983529890659132
return_mean1    254.207319
return_mean2    248.394881
return_sd1      199.448290
return_sd2      181.083751
dtype: float64
883.1342419634568
---
[1]  2  1 -1 -1

5.023942365564527
5.000547726120866
return_mean1    356.434064
return_mean2    247.950593
return_sd1      235.424358
return_sd2      173.147819
dtype: float64
1012.9568337539648
---
[1] -0.618034  1.000000 -1.000000 -1.000000

5.063860422575563
4.8977924216023005
return_mean1    379.869747
return_mean2    251.211521
return_sd1      198.540187
return_sd2      177.149370
dtype: float64
1006.7708250147936
---
[1]  1  1 -1 -1

5.083635029227609
4.932762784685886
return_mean1    259.888573
return_mean2    249.592561
return_sd1      199.502427
return_sd2      178.989649
dtype: float64


dtype: float64
437.1269980764399
---
[1]  0.06860734  0.02238247 -1.00000000 -1.00000000

5.065030923057463
4.99091111318031
return_mean1     33.592092
return_mean2     34.372928
return_sd1      194.295178
return_sd2      172.553827
dtype: float64
434.81402606303567
---
[1]  0.06860734  0.02248022 -1.00000000 -1.00000000

5.017271177690762
4.931232010300562
return_mean1     34.893059
return_mean2     33.988801
return_sd1      194.534667
return_sd2      172.832877
dtype: float64
436.2494040332532
---
[1]  0.06860734  0.02248022  0.00000000 -1.00000000

5.046526224477517
5.127380512073765
return_mean1      77.530255
return_mean2      35.432144
return_sd1      1185.351096
return_sd2       171.800945
dtype: float64
1470.1144405601656
---
[1]  0.06860734  0.02248022 -2.61803400 -1.00000000

5.029849614130829
5.011408443674001
return_mean1     23.017164
return_mean2     34.359030
return_sd1      267.385467
return_sd2      171.799907
dtype: float64
496.5615690149033
---
[1]  0.06860734  0.022

dtype: float64
126.77432142319284
---
[1]  0.06860734  0.02248022 -1.42315568 -1.36325136

5.013996915529532
5.041267559772866
return_mean1    30.627088
return_mean2    31.660504
return_sd1      19.706276
return_sd2      20.911568
dtype: float64
102.90543619005025
---
[1]  0.06860734  0.02248022 -1.42315568 -1.41262715

5.0320215816989275
5.032496023021286
return_mean1    29.504575
return_mean2    28.311401
return_sd1      20.151733
return_sd2      27.570702
dtype: float64
105.53841152265548
---
[1]  0.06860734  0.02248022 -1.42315568 -1.38869331

5.026780202155203
5.040164414380262
return_mean1    27.461909
return_mean2    27.680701
return_sd1      21.280038
return_sd2      23.597313
dtype: float64
100.01996031798234
---
[1]  0.06860734  0.02248022 -1.42315568 -1.37787130

5.004780317640271
4.974017444646171
return_mean1    27.475338
return_mean2    29.033732
return_sd1      20.346042
return_sd2      21.163691
dtype: float64
98.01880263713447
---
[1]  0.06860734  0.02248022 -1.4231556

dtype: float64
99.2504513484144
---
[1]  0.06939608  0.03004303 -1.42315568 -1.36987041

4.965929503878852
4.972300097985354
return_mean1    28.404651
return_mean2    28.688368
return_sd1      19.551730
return_sd2      21.010931
dtype: float64
97.65568017717523
---
[1]  0.06939608  0.03004303 -0.42315568 -1.36987041

4.973508381567346
5.068660219073018
return_mean1     53.294172
return_mean2     26.584167
return_sd1      644.205109
return_sd2       20.944605
dtype: float64
745.0280532644331
---
[1]  0.06939608  0.03004303 -3.04118968 -1.36987041

5.0436705475020185
4.973744330527269
return_mean1     20.756739
return_mean2     29.517278
return_sd1      306.827990
return_sd2       19.909468
dtype: float64
377.0114745532508
---
[1]  0.06939608  0.03004303 -1.42315568 -1.36987041

5.028346320492397
4.995677725170369
return_mean1    26.793903
return_mean2    27.616683
return_sd1      19.320759
return_sd2      20.629963
dtype: float64
94.36130767918263
---
[1]  0.06939608  0.03004303 -2.0411

dtype: float64
100.56278882364298
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

4.978022112860168
4.97153249731018
return_mean1    28.763366
return_mean2    28.987153
return_sd1      17.318675
return_sd2      21.841596
dtype: float64
96.91079083246049
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

5.059021119136233
4.981967662573735
return_mean1    28.534703
return_mean2    26.399098
return_sd1      18.368461
return_sd2      20.274237
dtype: float64
93.57649957201798
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

5.051843146396627
4.9798629657604785
return_mean1    30.859124
return_mean2    28.439020
return_sd1      19.633952
return_sd2      21.336172
dtype: float64
100.26826711032183
---
[1]  0.06939608  0.03004303 -1.42315594 -1.36987041

5.0407010172525135
5.007698733304744
return_mean1    26.952981
return_mean2    28.415125
return_sd1      19.315070
return_sd2      21.787769
dtype: float64
96.47094471366013
---
[1]  0.06939608  0.03004303 -1.42315594 

dtype: float64
104.13858656108935
---
[1]  0.07542793  1.03004303 -1.42315594 -1.31192146

4.992722063926402
4.866121615668144
return_mean1     31.068136
return_mean2    258.515777
return_sd1       18.622413
return_sd2       32.643437
dtype: float64
340.84976313923926
---
[1]  0.07542793 -1.58799097 -1.42315594 -1.31192146

5.029822252389404
5.170481891297585
return_mean1      30.131395
return_mean2    1653.917163
return_sd1        18.321441
return_sd2       450.550101
dtype: float64
2152.92009954531
---
[1]  0.07542793  0.03004303 -1.42315594 -1.31192146

5.030217584124392
4.991309260386722
return_mean1    29.805158
return_mean2    32.330506
return_sd1      19.960779
return_sd2      26.263784
dtype: float64
108.36022664200141
---
[1]  0.07542793 -0.58799095 -1.42315594 -1.31192146

5.013500685700763
5.08595727102201
return_mean1     29.631761
return_mean2    350.778445
return_sd1       20.884604
return_sd2       26.748800
dtype: float64
428.0436097906171
---
[1]  0.07542793  0.4120090


5.050949262646782
5.037604897716858
return_mean1    32.162959
return_mean2    26.954885
return_sd1      53.850934
return_sd2      26.737458
dtype: float64
139.70623556087523
---
[1]  0.07542793  0.01132853 -1.41713942 -1.31192146

4.996191035275464
5.0485146373788625
return_mean1    29.242537
return_mean2    30.706433
return_sd1      19.028760
return_sd2      27.632291
dtype: float64
106.6100210082025
---
[1]  0.07542793  0.01132853 -1.36370944 -1.31192146

4.988831332923793
5.053185564846189
return_mean1    31.025002
return_mean2    30.106688
return_sd1      23.008132
return_sd2      25.270471
dtype: float64
109.41029259017236
---
[1]  0.07542793  0.01132853 -1.39628765 -1.31192146

4.950747815966103
4.981002461546946
return_mean1    30.205364
return_mean2    30.236693
return_sd1      20.196244
return_sd2      26.862460
dtype: float64
107.5007613224474
---
[1]  0.07542793  0.01132853 -1.40895526 -1.31192146

5.011347540739124
5.03849454800125
return_mean1    31.242404
return_mean2   

dtype: float64
103.34881370700168
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31565748

5.011869523613602
4.951031172315016
return_mean1    29.134215
return_mean2    32.433505
return_sd1      20.558656
return_sd2      25.272168
dtype: float64
107.39854390690329
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31669014

5.062580936701368
5.027820916970252
return_mean1    29.742278
return_mean2    29.456177
return_sd1      17.517882
return_sd2      25.262040
dtype: float64
101.97837598742032
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31654777

4.994426736219841
5.010740903485009
return_mean1    32.228066
return_mean2    30.711335
return_sd1      18.264083
return_sd2      25.665513
dtype: float64
106.8689978611059
---
[1]  0.07542793  0.01132853 -1.40749735 -1.31669062

4.9903215966979015
4.9855732702118285
return_mean1    28.712826
return_mean2    29.182906
return_sd1      19.535703
return_sd2      25.434520
dtype: float64
102.86595440868632
---
[1]  0.07542793  0.01132853 -1.407497

# ou44 powell with bound

In [None]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(441)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return
def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)
    print(int(np.random.randint(low=0, high=980608, size=(1,))))

    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)

    
    print(int(np.random.randint(low=0, high=980608, size=(1,))))
    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

# ou44 Powell with no seed

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing


# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(num_sim=num_sim,
                          mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                          sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                          xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return
def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


def multi_process(iter):

    print(iter)

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1] 0.5 0.5 0.5 0.5

5.023367493888124
4.9449481189151
return_mean1    153.131421
return_mean2    148.903577
return_sd1      401.941049
return_sd2      378.408244
dtype: float64
1082.3842907674127
---
[1] 0.381966 0.500000 0.500000 0.500000

5.031613968941823
4.924320399555874
return_mean1    122.421447
return_mean2    150.603917
return_sd1      398.867889
return_sd2      375.009550
dtype: float64
1046.9028030451377
---
[1] 0.618034 0.500000 0.500000 0.500000

4.92081531241981
4.986561676531084
return_mean1    186.382667
return_mean2    150.571216
return_sd1      403.268873
return_sd2      381.686178
dtype: float64
1121.908934033991
---
[1] 0.236068 0.500000 0.500000 0.500000

5.021669829199146
4.955396722403672
return_mean1     77.786531
return_mean2    144.492882
return_sd1      404.148864
return_sd2      382.382306
dtype: float64
1008.8105828235778
---
[1] 0.145898 0.500000 0.500000 0.500000

4.95382353389236
5.052259989699721
return_mean1     52.062830
return_mean2    153.949626


876.7316597505644
---
[1] 0.03965469 0.11698024 0.50000000 0.50000000

4.999131019462538
4.964696792413482
return_mean1     43.054012
return_mean2     51.443278
return_sd1      405.603900
return_sd2      384.481911
dtype: float64
884.5831014264329
---
[1] 0.03965469 0.11275522 0.50000000 0.50000000

4.9296511402093754
4.907848533079072
return_mean1     44.513132
return_mean2     47.624955
return_sd1      400.852318
return_sd2      379.762437
dtype: float64
872.7528427689132
---
[1] 0.03965469 0.11091224 0.50000000 0.50000000

4.953722374160426
5.122897315059032
return_mean1     45.601354
return_mean2     52.431902
return_sd1      400.510126
return_sd2      380.341496
dtype: float64
878.8848774012058
---
[1] 0.03965469 0.11205105 0.50000000 0.50000000

5.053390328339946
4.9876194860188985
return_mean1     46.929245
return_mean2     42.709106
return_sd1      405.854988
return_sd2      377.932588
dtype: float64
873.4259265396929
---
[1] 0.03965469 0.11240316 0.50000000 0.50000000

5.07711

dtype: float64
478.9258822453412
---
[1] 0.03965469 0.11275488 0.24297916 0.50000000

5.039639878537073
4.934825084171603
return_mean1     27.551434
return_mean2     48.753981
return_sd1       20.527601
return_sd2      378.078946
dtype: float64
474.91196156786634
---
[1] 0.03965469 0.11275488 0.24297949 0.38196601

4.954627917202341
5.121598915358425
return_mean1     25.612025
return_mean2     39.263668
return_sd1       18.282162
return_sd2      194.396531
dtype: float64
277.5543861377855
---
[1] 0.03965469 0.11275488 0.24297949 0.61803399

4.986997924253102
4.996383317178394
return_mean1     26.169627
return_mean2     55.037053
return_sd1       19.413305
return_sd2      561.714454
dtype: float64
662.3344387301702
---
[1] 0.03965469 0.11275488 0.24297949 0.23606798

5.019681647285118
5.060468144522856
return_mean1    26.928991
return_mean2    35.041339
return_sd1      18.829713
return_sd2      34.074465
dtype: float64
114.87450792826539
---
[1] 0.03965469 0.11275488 0.24297949 0.145898

dtype: float64
104.78928239608284
---
[1] 0.06284388 0.11275488 0.24297949 0.25508038

5.066622414176848
5.043645343612987
return_mean1    27.198728
return_mean2    36.806494
return_sd1      20.226292
return_sd2      20.906083
dtype: float64
105.13759778475594
---
[1] 0.06703507 0.11275488 0.24297949 0.25508038

5.048586559620348
4.97986968507796
return_mean1    25.661407
return_mean2    34.785374
return_sd1      21.633303
return_sd2      22.362782
dtype: float64
104.44286583093944
---
[1] 0.06681613 0.11275488 0.24297949 0.25508038

5.045884741663371
4.9802402859238555
return_mean1    27.244982
return_mean2    35.488067
return_sd1      20.017824
return_sd2      21.855993
dtype: float64
104.60686575082086
---
[1] 0.06811743 0.11275488 0.24297949 0.25508038

5.010495431406227
5.018919896962887
return_mean1    28.123375
return_mean2    34.800850
return_sd1      18.664706
return_sd2      21.050024
dtype: float64
102.63895542377011
---
[1] 0.06869341 0.11275488 0.24297949 0.25508038

4.942

---
[1] 0.06868115 0.05550126 0.24297949 0.25508038

5.0708386168833215
4.961820528120389
return_mean1    29.215207
return_mean2    26.582501
return_sd1      19.536133
return_sd2      20.246474
dtype: float64
95.58031433405571
---
[1] 0.06868115 0.05564124 0.24297949 0.25508038

5.0046617651100425
5.066374723953036
return_mean1    29.376517
return_mean2    27.712494
return_sd1      20.650901
return_sd2      22.930964
dtype: float64
100.67087596343967
---
[1] 0.06868115 0.05569471 0.24297949 0.25508038

5.041946157766476
5.055966564235924
return_mean1    29.390635
return_mean2    29.783857
return_sd1      17.097000
return_sd2      20.005692
dtype: float64
96.27718395252569
---
[1] 0.06868115 0.05571513 0.24297949 0.25508038

5.014508759296496
4.999818042584111
return_mean1    27.213378
return_mean2    30.080421
return_sd1      19.900869
return_sd2      19.417425
dtype: float64
96.61209240399782
---
[1] 0.06868115 0.05572293 0.24297949 0.25508038

4.978034388387272
5.053286423481455
retu


4.9406808350331755
5.0453142789602925
return_mean1    28.157054
return_mean2    27.459652
return_sd1      19.083733
return_sd2      34.717903
dtype: float64
109.41834299912071
---
[1] 0.06868115 0.05572776 0.24399802 0.14589803

4.989233388562686
4.977436125431993
return_mean1     27.594892
return_mean2     22.693743
return_sd1       19.460044
return_sd2      173.357884
dtype: float64
243.10656256468314
---
[1] 0.06868115 0.05572776 0.24399802 0.25651044

4.989285068037556
5.046105851463784
return_mean1    30.353670
return_mean2    27.136223
return_sd1      19.163241
return_sd2      19.711492
dtype: float64
96.36462512980268
---
[1] 0.06868115 0.05572776 0.24399802 0.28812984

5.039331377875639
5.022163001819589
return_mean1    29.403647
return_mean2    29.293911
return_sd1      19.597593
return_sd2      48.650380
dtype: float64
126.94553055026469
---
[1] 0.06868115 0.05572776 0.24399802 0.25664116

5.013587679357218
4.979054029095548
return_mean1    26.972314
return_mean2    29.07903

5.005687973323102
return_mean1    28.265856
return_mean2    27.502219
return_sd1      20.231530
return_sd2      21.015140
dtype: float64
97.01474590912181
---
[1] 0.04462414 0.05572776 0.24399802 0.25768193

5.079366744516911
4.998374707373841
return_mean1    27.924896
return_mean2    28.768440
return_sd1      19.154468
return_sd2      21.514902
dtype: float64
97.36270638991091
---
[1] 0.04038281 0.05572776 0.24399802 0.25768193

5.008597724636911
4.9970767048725975
return_mean1    25.906474
return_mean2    27.504163
return_sd1      18.670726
return_sd2      19.056570
dtype: float64
91.13793231814934
---
[1] 0.04200285 0.05572776 0.24399802 0.25768193

5.020651717225104
4.964818775460592
return_mean1    29.528242
return_mean2    28.205103
return_sd1      19.729666
return_sd2      19.920642
dtype: float64
97.38365260991563
---
[1] 0.03938156 0.05572776 0.24399802 0.25768193

5.030575345170808
5.0498221538448975
return_mean1    27.153654
return_mean2    28.322879
return_sd1      19.45026

# ou46 Powell with no seed

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing


# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(num_sim=num_sim,
                          mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                          sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                          xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

# Define the function of transforming returns data into feature statistics (or moments).
def cal_stats(n_return, n_price=None):
    """ 'mean' and 'sd' checked
    'skewness' and 'kurtosis' checked
    (different expressions of calculation from intro to stat finance)
    8 statistics
    """
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]

    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    autocorr1 = return_series1.apply(lambda x: x.autocorr(lag=1))

    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    autocorr2 = return_series2.apply(lambda x: x.autocorr(lag=1))

    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2, autocorr1, autocorr2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2',
        'return_autocorrelation1', 'return_autocorrelation2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


def multi_process(iter):

    print(iter)

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

# ou48 Powell with no seed

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing


# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(num_sim=num_sim,
                          mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                          sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                          xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

# Define the function of transforming returns data into feature statistics (or moments).
def cal_stats(n_return, n_price=None):
    """ 'mean' and 'sd' checked
    'skewness' and 'kurtosis' checked
    (different expressions of calculation from intro to stat finance)
    8 statistics
    """
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]

    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    skew1 = return_series1.skew(axis=0).values
    kurtosis1 = return_series1.kurtosis(axis=0).values

    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    skew2 = return_series2.skew(axis=0).values
    kurtosis2 = return_series2.kurtosis(axis=0).values

    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2, skew1, skew2, kurtosis1, kurtosis2, autocorr1, autocorr2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2',
        'return_skew1', 'return_skew2',
        'return_kurtosis1', 'return_kurtosis2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


def multi_process(iter):

    print(iter)

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

# ou410 Powell with no seed

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing


# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("sigma11", "sigma12", "sigma21", "sigma22"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(num_sim=num_sim,
                          mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                          sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                          xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))
def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))
def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return
def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

# Define the function of transforming returns data into feature statistics (or moments).
def cal_stats(n_return, n_price=None):
    """ 'mean' and 'sd' checked
    'skewness' and 'kurtosis' checked
    (different expressions of calculation from intro to stat finance)
    8 statistics
    """
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]

    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    skew1 = return_series1.skew(axis=0).values
    kurtosis1 = return_series1.kurtosis(axis=0).values
    autocorr1 = return_series1.apply(lambda x: x.autocorr(lag=1))

    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    skew2 = return_series2.skew(axis=0).values
    kurtosis2 = return_series2.kurtosis(axis=0).values
    autocorr2 = return_series2.apply(lambda x: x.autocorr(lag=1))

    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2, skew1, skew2, kurtosis1, kurtosis2, autocorr1, autocorr2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2',
        'return_skew1', 'return_skew2',
        'return_kurtosis1', 'return_kurtosis2',
        'return_autocorrelation1', 'return_autocorrelation2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, 0, 0
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, 0.2437, 0.2573


num_iter = 1
initial0 = [0.5, 0.5, 0.5, 0.5]


def multi_process(iter):

    print(iter)

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='Powell',
                   tol=1e-6, options={'disp': True},
                  bounds=[(0., 1), (0., 1), (0., 1), (0., 1)])
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

# L-BFGS-B

In [5]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(441)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("exp(sigma11)", "exp(sigma12)", "exp(sigma21)", "exp(sigma22)"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))

def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))

def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return

def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, -1000, -1000
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, -1.4118, -1.3574


num_iter = 1
initial0 = [1, 1, -1, -1]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = minimize(loss_function, initial0, method='L-BFGS-B',
                   bounds=[(0., 1), (0., 1), (-10, 0), (-10, 0)], 
                   options={'eps': 0.5, 'gtol': 1e-4, 'iprint': 1, 'ftol': 1e-09})
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1]  1  1 -1 -1

5.043932089409035
4.848796057384303
return_mean1    257.313378
return_mean2    251.792801
return_sd1      200.487853
return_sd2      178.956875
dtype: float64
888.550906163143
---
[1]  0.5  1.0 -1.0 -1.0

5.064931605699332
4.983529890659132
return_mean1    150.487917
return_mean2    248.394881
return_sd1      193.918719
return_sd2      181.083751
dtype: float64
773.8852678939413
---
[1]  1.0  0.5 -1.0 -1.0

5.023942365564527
5.050530316236692
return_mean1    255.502956
return_mean2    147.004769
return_sd1      201.371837
return_sd2      167.890546
dtype: float64
771.7701082987224
---
[1]  1.0  1.0 -0.5 -1.0

5.063860422575563
4.8977924216023005
return_mean1    263.589873
return_mean2    251.211521
return_sd1      575.290367
return_sd2      177.149370
dtype: float64
1267.2411309787794
---
[1]  1.0  1.0 -1.0 -0.5

5.083635029227609
4.941126425652707
return_mean1    259.888573
return_mean2    248.764799
return_sd1      199.502427
return_sd2      551.265025
dtype: float

dtype: float64
937.5687181234716
---
[1]   0.000000   0.500000 -10.000000  -9.977013

5.017271177690762
4.969454534149802
return_mean1     21.715570
return_mean2    149.693060
return_sd1      381.871126
return_sd2      379.978407
dtype: float64
933.2581641002628
---
[1]  0.000000  0.000000 -9.500000 -9.977013

5.046526224477517
5.019473480837054
return_mean1     22.325481
return_mean2     23.669915
return_sd1      383.186378
return_sd2      402.872928
dtype: float64
832.0547015867696
---
[1]   0.000000   0.000000 -10.000000  -9.477013

5.029849614130829
5.01945836114361
return_mean1     26.232097
return_mean2     22.026872
return_sd1      381.203345
return_sd2      401.344199
dtype: float64
830.8065134479084
---
[  0.           0.         -10.          -9.97701282]
0:02:12.149247
[1]   0.000000   0.000000 -10.000000  -9.977013

4.956089208956063
5.019457914502061
return_mean1     22.723460
return_mean2     26.089380
return_sd1      380.674610
return_sd2      404.183294
dtype: float64
8

Process ForkPoolWorker-90:
Process ForkPoolWorker-93:
Process ForkPoolWorker-91:
Process ForkPoolWorker-89:
Process ForkPoolWorker-92:
Process ForkPoolWorker-88:
Process ForkPoolWorker-94:
Process ForkPoolWorker-87:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()


# BasinHopping

In [3]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(441)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("exp(sigma11)", "exp(sigma12)", "exp(sigma21)", "exp(sigma22)"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))

def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))

def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return

def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

def cal_stats(n_return, n_price=None):
    # (different expressions of calculation from intro to stat finance)
    # 4 statistics
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]
    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_real_log_price = n_ou_simulation(
        int(np.random.randint(low=0, high=980608, size=(1,))), num_sim,
        mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
        xinit_vec, T0, T, length)
    print(n_real_log_price.iloc[5, 5])
    n_real_price = log_price_to_price(n_log_price=n_real_log_price)
    n_real_return = price_to_return(n_price=n_real_price)
    n_real_stats = cal_stats(n_return=n_real_return, n_price=None)


    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=mu11, mu12=params[0], mu21=mu21, mu22=params[1],
        sigma11=params[2], sigma12=sigma12, sigma21=sigma21, sigma22=params[3],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    print(n_sim_log_price.iloc[5, 5])
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)
mu11, mu21, sigma12, sigma21 = 0, 0, -1000, -1000
xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]
mu12, mu22, sigma11, sigma22 = 0.0369, 0.0405, -1.4118, -1.3574


num_iter = 1
initial0 = [1, 1, -1, -1]


iter_seed = np.random.randint(low=0, high=980608, size=(num_iter,))
def multi_process(iter):

    print(iter)
    np.random.seed(int(iter_seed[iter]))

    begin_time = datetime.datetime.now()
    res = scipy.optimize.basinhopping(
        
        func=loss_function, x0=initial0, niter=300, stepsize=0.1,
        minimizer_kwargs={'method': "L-BFGS-B"})
    
    print(res.x)

    time = datetime.datetime.now() - begin_time
    print(time)

    params = (res.x)
    loss = loss_function((params))
    print(loss)
    return (res.x, time, loss)


iterations = [i for i in range(num_iter)]
pool = multiprocessing.Pool()
result = pool.map(multi_process, iterations)
print(result)

0
[1]  1  1 -1 -1

5.043932089409035
4.848796057384303
return_mean1    257.313378
return_mean2    251.792801
return_sd1      200.487853
return_sd2      178.956875
dtype: float64
888.550906163143
---
[1]  1  1 -1 -1

5.064931605699332
4.983529890659132
return_mean1    254.207321
return_mean2    248.394881
return_sd1      199.448291
return_sd2      181.083751
dtype: float64
883.1342437458206
---
[1]  1  1 -1 -1

5.023942365564527
5.000547725125207
return_mean1    255.502956
return_mean2    247.950594
return_sd1      201.371837
return_sd2      173.147819
dtype: float64
877.9732064004382
---
[1]  1  1 -1 -1

5.063860422575563
4.8977924216023005
return_mean1    261.661229
return_mean2    251.211521
return_sd1      202.147930
return_sd2      177.149370
dtype: float64
892.1700496038671
---
[1]  1  1 -1 -1

5.083635029227609
4.932762784814812
return_mean1    259.888573
return_mean2    249.592561
return_sd1      199.502427
return_sd2      178.989655
dtype: float64
887.9732157129876
---
[1]  1.4

dtype: float64
885.5233883163237
---
[1]  1.0005203  1.0010161 -1.0003477 -0.9999445

5.017271177690762
4.834498940007365
return_mean1    258.968506
return_mean2    249.690936
return_sd1      200.920286
return_sd2      179.022770
dtype: float64
888.6024980737416
---
[1]  1.0005203  1.0010161 -1.0003477 -0.9999445

5.046526224477517
5.029175238760915
return_mean1    259.556604
return_mean2    249.860454
return_sd1      200.340379
return_sd2      177.784282
dtype: float64
887.5417201017116
---
[1]  1.0005203  1.0010161 -1.0003477 -0.9999445

5.029849614130829
4.913523909830187
return_mean1    253.423881
return_mean2    252.839670
return_sd1      199.601483
return_sd2      177.842708
dtype: float64
883.70774310489
---
[1]  1.0001584  1.0003093 -1.0001058 -0.9999831

4.956089208956063
4.906704494342372
return_mean1    262.585887
return_mean2    249.608363
return_sd1      203.376116
return_sd2      178.677460
dtype: float64
894.2478265695977
---
[1]  1.0001584  1.0003093 -1.0001058 -0.99998

dtype: float64
888.7987832589104
---
[1]  1  1 -1 -1

5.026780202155203
4.95355953415954
return_mean1    256.651500
return_mean2    248.368377
return_sd1      198.737641
return_sd2      177.169637
dtype: float64
880.9271547067849
---
[1]  1  1 -1 -1

5.004780317640271


From cffi callback Process ForkPoolWorker-21:
Process ForkPoolWorker-19:
Process ForkPoolWorker-23:
Process ForkPoolWorker-17:
<function _processevents at 0x7fd419fb1b90>Process ForkPoolWorker-22:


KeyboardInterrupt: 

Process ForkPoolWorker-18:
:
Process ForkPoolWorker-20:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/site-packages/rpy2/rinterface_lib/callbacks.py", line 274, in _processevents
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/ana

4.856773536761149
return_mean1    258.269497
return_mean2    249.573383
return_sd1      201.352865
return_sd2      179.463085
dtype: float64
888.6588308496888
---
[1]  1  1 -1 -1

5.03878764509143
5.061965587967287
return_mean1    257.720888
return_mean2    252.360014
return_sd1      202.860500
return_sd2      178.717994
dtype: float64
891.659394886576
---
[1]  1  1 -1 -1

4.967380382266251
4.849463360349527
return_mean1    255.632700
return_mean2    254.035351
return_sd1      198.207529
return_sd2      179.165119
dtype: float64
887.0406992705418
---
[1]  1  1 -1 -1

4.975963715553491
4.8895338907290995
return_mean1    255.275381
return_mean2    247.860297
return_sd1      199.596290
return_sd2      175.857549
dtype: float64
878.5895169949424
---
[1]  1  1 -1 -1

5.021371513798262
4.884125232290712
return_mean1    255.208470
return_mean2    250.323663
return_sd1      203.186202
return_sd2      178.888601
dtype: float64
887.6069363087001
---
[1]  1  1 -1 -1

4.976624879441185
4.872997325


5.009878944808496
4.751476810772063
return_mean1    323.520443
return_mean2    322.166878
return_sd1      333.921244
return_sd2      576.777489
dtype: float64
1556.386054014716
---
[1]  1.1808751  1.2896636 -0.9534553 -0.7626076

5.037685561965899
4.637889295694369
return_mean1    281.517954
return_mean2    289.591213
return_sd1      234.030181
return_sd2      334.558737
dtype: float64
1139.6980840866981
---
[1]  1.1808751  1.2896636 -0.9534553 -0.7626076

5.041694365858872
4.881713410331922
return_mean1    286.106487
return_mean2    293.477858
return_sd1      230.316034
return_sd2      337.002372
dtype: float64
1146.902750802235
---
[1]  1.1808751  1.2896636 -0.9534553 -0.7626076

4.987327040108075
4.870503243708655
return_mean1    284.110280
return_mean2    291.320436
return_sd1      230.203887
return_sd2      342.754101
dtype: float64
1148.3887041316189
---
[1]  1.1808751  1.2896636 -0.9534553 -0.7626076

5.019084329709779
4.8100747700256035
return_mean1    283.149103
return_mean2 

dtype: float64
846.9294351164527
---
[1]  0.9411862  1.0110531 -1.0709142 -0.9821025

4.96714510645127
4.934493693880225
return_mean1    248.928394
return_mean2    248.771774
return_sd1      157.476247
return_sd2      192.812260
dtype: float64
847.9886756241931
---
[1]  0.9411862  1.0110531 -1.0709142 -0.9821025



Process ForkPoolWorker-16:
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/multiprocessing/pool.py", line 44, in mapstar
    return list(map(*args))
  File "<ipython-input-3-5eba658e46d3>", line 170, in multi_process
    minimizer_kwargs={'method': "L-BFGS-B"})
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/site-packages/scipy/optimize/_basinhopping.py", line 693, in basinhopping
    new_global_min = bh.one_cycle()
  File "/opt/homebrew/anaconda3/envs/venv37/lib/python3.7/site-packages/scipy/optimize/_basinhopping.py",

In [7]:
from scipy.optimize import minimize, rosen, rosen_der

rosen_der

<function scipy.optimize.optimize.rosen_der(x)>