In [None]:
import pandas as pd
import numpy as np
import random
from numpy.random import RandomState
from scipy import stats
from scipy.optimize import minimize
import scipy
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Versions/4.1/Resources/'
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import datetime
from rpy2.robjects.vectors import FloatVector
import multiprocessing

np.random.seed(881)

# Define the model that generates pair simulations.
yuima = importr("yuima")
n_ou_sim_string = """
n_sim_ou = function(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length){

  set.seed(random_seed)

  drift = c("mu11-mu12*X1", "mu21-mu22*X2")
  diffusion = matrix(c("exp(sigma11)", "exp(sigma12)", "exp(sigma21)", "exp(sigma22)"), 2, 2, byrow=TRUE)
  ou_model = setModel(drift=drift, diffusion=diffusion, 
                        time.variable = "t",
                        state.var=c("X1","X2"), solve.variable=c("X1","X2"))

  newsamp = setSampling(Initial=T0, Terminal=T, n=length)

  n_sim_ou_data = data.frame(matrix(nrow=length+1, ncol=2*num_sim))
  for (i in 1:num_sim){
    ou_sim = simulate(ou_model, 
                      true.par=list(
                        mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22, 
                        sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22), 
                      xinit=xinit_vec[i], sampling=newsamp)
    original_data = ou_sim@data@original.data
    one_sim_ou = data.frame(original_data[,1], original_data[,2])
    colnames(one_sim_ou) = c('series1', 'series2')
    n_sim_ou_data[, (2*i-1):(2*i)] = one_sim_ou
  }
  return(n_sim_ou_data)
}
"""

n_ou_sim = SignatureTranslatedAnonymousPackage(n_ou_sim_string, "n_ou_sim")
def n_ou_simulation(random_seed, num_sim,
                    mu11, mu12, mu21, mu22, sigma11, sigma12, sigma21, sigma22,
                    xinit_vec, T0, T, length):
    """num_sim simulations of bivariate Ornstein-Uhlenbeck process,
    length = length of one series
    """
    n_ou_sim_data = pd.DataFrame(
        n_ou_sim.n_sim_ou(random_seed=random_seed, num_sim=num_sim,
                              mu11=mu11, mu12=mu12, mu21=mu21, mu22=mu22,
                              sigma11=sigma11, sigma12=sigma12, sigma21=sigma21, sigma22=sigma22,
                              xinit_vec=xinit_vec, T0=T0, T=T, length=length)).transpose()
    return n_ou_sim_data

def price_to_log_price(n_price):
    return(np.log(n_price))

def log_price_to_price(n_log_price):
    return(np.exp(n_log_price))

def price_to_return(n_price):
    n_return = pd.DataFrame()
    for i in range(n_price.shape[1]):
        ith_column_price_series = n_price.iloc[:, i]
        n_return = pd.concat([n_return, 100 * (np.log(ith_column_price_series[1:].values) - np.log(ith_column_price_series[:-1]))], axis=1)
    return n_return

def log_price_to_return(n_log_price):
    n_real_return = pd.DataFrame()
    for i in range(n_log_price.shape[1]):
        ith_column_price_series = n_log_price.iloc[:, i]
        n_real_return = pd.concat([n_real_return, 100 * (ith_column_price_series[1:].values - ith_column_price_series[:-1])], axis=1)
    return n_real_return

# Define the function of transforming returns data into feature statistics (or moments).
def cal_stats(n_return, n_price=None):
    """ 'mean' and 'sd' checked
    'skewness' and 'kurtosis' checked
    (different expressions of calculation from intro to stat finance)
    8 statistics
    """
    return_series1 = n_return.iloc[:, ::2]
    return_series2 = n_return.iloc[:, 1::2]

    mean1 = return_series1.mean(axis=0).values
    sd1 = return_series1.std(axis=0).values
    skew1 = return_series1.skew(axis=0).values
    kurtosis1 = return_series1.kurtosis(axis=0).values

    mean2 = return_series2.mean(axis=0).values
    sd2 = return_series2.std(axis=0).values
    skew2 = return_series2.skew(axis=0).values
    kurtosis2 = return_series2.kurtosis(axis=0).values

    stats_data = pd.DataFrame([mean1, mean2, sd1, sd2, skew1, skew2, kurtosis1, kurtosis2])
    stats_data = stats_data.transpose()
    stats_data.columns = [
        'return_mean1', 'return_mean2',
        'return_sd1', 'return_sd2',
        'return_skew1', 'return_skew2',
        'return_kurtosis1', 'return_kurtosis2']
    return stats_data

def loss_function(params):
    """n_real_stats is a global amount calculated outside the function"""
    params = FloatVector(params)
    print(params)
    moment_loss = pd.DataFrame().reindex_like(real_stats)


    n_sim_log_price = n_ou_simulation(
        random_seed=int(np.random.randint(low=0, high=980608, size=(1,))), num_sim=num_sim,
        mu11=params[0], mu12=params[1], mu21=params[2], mu22=params[3],
        sigma11=params[4], sigma12=params[5], sigma21=params[6], sigma22=params[7],
        xinit_vec=xinit_vec, T0=T0, T=T, length=length)
    n_sim_price = log_price_to_price(n_sim_log_price)
    n_sim_return = price_to_return(n_sim_price)
    n_sim_stats = cal_stats(n_sim_return)


    for i in range(n_real_stats.shape[0]):
        for j in range(n_real_stats.shape[1]):
            moment_loss.iloc[i, j] = np.sqrt((n_real_stats.iloc[i, j] - n_sim_stats.iloc[i, j])**2)
    sum_all = np.sum(moment_loss)
    print(sum_all)
    print(np.sum(sum_all))
    print('---')

    return np.sum(sum_all)



real_price = pd.read_csv("sp500_20180101_20181231_pair_prices.csv", index_col=[0])
real_log_price = price_to_log_price(n_price=real_price)
real_return = pd.read_csv("sp500_20180101_20181231_pair_returns.csv", index_col=[0])
real_stats = cal_stats(n_return=real_return, n_price=None)


xinit_vec = []
for i in range(int(real_log_price.shape[1]/2)):
    init_pair_log_price = [real_log_price.iloc[0, 2*i], real_log_price.iloc[0, 2*i+1]]
    init_pair_log_price = FloatVector(init_pair_log_price)
    xinit_vec.append(init_pair_log_price)
num_sim, T0, T, length = real_stats.shape[0], 0, 1, real_price.shape[0]

n_real_stats = real_stats



initial0 = [1, 1, 1, 1, -1, -1, -1, -1]
begin_time = datetime.datetime.now()
res = minimize(loss_function, initial0, method='Powell',
               tol=1e-6, options={'disp': True},
               bounds=[(0., None), (0., None), (0., None), (0., None),
                       (None, None), (None, None), (None, None), (None, None)])
print(res.x)

time = datetime.datetime.now() - begin_time
print(time)

params = (res.x)
loss = loss_function((params))
print(loss)

[1]  1  1 -1 -1

return_mean1    266.810737
return_mean2    254.309517
return_sd1      173.505310
return_sd2      175.074364
dtype: float64
869.6999289105129
---
[1]  1.115092  1.000000 -1.000000 -1.000000

return_mean1    285.747569
return_mean2    255.209460
return_sd1      176.684491
return_sd2      175.052979
dtype: float64
892.6944996746786
---
[1]  1.793574  1.000000 -1.000000 -1.000000

return_mean1    354.886664
return_mean2    254.597384
return_sd1      197.507617
return_sd2      174.782313
dtype: float64
981.7739770368207
---
[1]  0.7667255  1.0000000 -1.0000000 -1.0000000

return_mean1    223.992947
return_mean2    257.082894
return_sd1      170.298600
return_sd2      170.487935
dtype: float64
821.8623764817106
---
[1]  0.5272218  1.0000000 -1.0000000 -1.0000000

return_mean1    172.976626
return_mean2    254.862270
return_sd1      167.707300
return_sd2      173.028143
dtype: float64
768.5743393229665
---
[1]  0.3548754  1.0000000 -1.0000000 -1.0000000

return_mean1    127.2

return_mean1     37.513061
return_mean2     36.042286
return_sd1      170.003149
return_sd2      167.542550
dtype: float64
411.10104663699497
---
[1]  0.04872717  0.04560194 -1.00000000 -1.00000000

return_mean1     36.989437
return_mean2     33.099695
return_sd1      168.832296
return_sd2      165.485238
dtype: float64
404.4066662320307
---
[1]  0.04872717  0.04427446 -1.00000000 -1.00000000

return_mean1     37.111164
return_mean2     35.084592
return_sd1      167.728053
return_sd2      168.516216
dtype: float64
408.44002583598854
---
[1]  0.04872717  0.05188696 -1.00000000 -1.00000000

return_mean1     35.392170
return_mean2     34.706886
return_sd1      167.400661
return_sd2      170.637225
dtype: float64
408.13694233523813
---
[1]  0.04872717  0.04800727 -1.00000000 -1.00000000

return_mean1     33.683854
return_mean2     36.473149
return_sd1      168.240351
return_sd2      168.708766
dtype: float64
407.1061198591656
---
[1]  0.04872717  0.04652138 -1.00000000 -1.00000000

return_

return_mean1      31.047558
return_mean2      84.678844
return_sd1        85.589487
return_sd2      1129.185429
dtype: float64
1330.5013182438793
---
[1]  0.04872717  0.04560194 -1.30408100 -2.61803400

return_mean1     30.156972
return_mean2     20.280663
return_sd1       87.145198
return_sd2      329.395289
dtype: float64
466.9781220098183
---
[1]  0.04872717  0.04560194 -1.30408100 -1.00000000

return_mean1     29.733384
return_mean2     32.968188
return_sd1       86.637604
return_sd2      167.370490
dtype: float64
316.7096666399348
---
[1]  0.04872717  0.04560194 -1.30408100 -1.61803397

return_mean1     30.423673
return_mean2     23.643706
return_sd1       87.189478
return_sd2      137.870175
dtype: float64
279.1270314285018
---
[1]  0.04872717  0.04560194 -1.30408100 -1.99999998

return_mean1     31.277601
return_mean2     22.931560
return_sd1       86.658691
return_sd2      232.593680
dtype: float64
373.4615326184498
---
[1]  0.04872717  0.04560194 -1.30408100 -1.40780476

retur

return_mean1    32.060898
return_mean2    26.830304
return_sd1      88.692778
return_sd2      94.087175
dtype: float64
241.6711551581567
---
[1]  0.04521901  0.04560194 -1.30408100 -1.33087324

return_mean1    30.709796
return_mean2    27.981153
return_sd1      85.925202
return_sd2      93.598750
dtype: float64
238.21490043163652
---
[1]  0.04523459  0.04560194 -1.30408100 -1.33087324

return_mean1    30.656655
return_mean2    27.889268
return_sd1      86.226679
return_sd2      95.487157
dtype: float64
240.2597588419868
---
[1]  0.04520937  0.04560194 -1.30408100 -1.33087324

return_mean1    29.050795
return_mean2    29.608159
return_sd1      86.577077
return_sd2      94.894774
dtype: float64
240.13080485338094
---
[1]  0.04522496  0.04560194 -1.30408100 -1.33087324

return_mean1    30.575551
return_mean2    26.913453
return_sd1      86.564806
return_sd2      95.568971
dtype: float64
239.6227809462034
---
[1]  0.04521533  0.04560194 -1.30408100 -1.33087324

return_mean1    27.750413
re

return_mean1    31.816562
return_mean2    24.909592
return_sd1      85.948239
return_sd2      96.144091
dtype: float64
238.81848406047567
---
[1]  0.04521533  0.02109315 -1.92211497 -1.33087324

return_mean1     26.829083
return_mean2     27.296726
return_sd1      199.409672
return_sd2       93.768639
dtype: float64
347.3041205368977
---
[1]  0.04521533  0.02109315 -0.92211500 -1.33087324

return_mean1     37.073395
return_mean2     27.726900
return_sd1      211.086403
return_sd2       94.447114
dtype: float64
370.3338122027243
---
[1]  0.04521533  0.02109315 -1.44426552 -1.33087324

return_mean1    29.327990
return_mean2    25.695620
return_sd1      92.458161
return_sd2      94.850913
dtype: float64
242.33268334577576
---
[1]  0.04521533  0.02109315 -1.33436696 -1.33087324

return_mean1    29.684507
return_mean2    27.775063
return_sd1      85.168414
return_sd2      95.485738
dtype: float64
238.11372216642422
---
[1]  0.04521533  0.02109315 -1.34567660 -1.33087324

return_mean1    29.

return_mean1    30.722532
return_mean2    27.997176
return_sd1      85.823943
return_sd2      96.203932
dtype: float64
240.7475826101287
---
[1]  0.04521533  0.02109315 -1.34683880 -1.33808292

return_mean1    30.030504
return_mean2    26.881863
return_sd1      85.053904
return_sd2      94.948355
dtype: float64
236.91462617504789
---
[1]  0.04521533  0.02109315 -1.34683880 -1.33808619

return_mean1    33.277303
return_mean2    26.429866
return_sd1      87.063473
return_sd2      94.376272
dtype: float64
241.14691379170222
---
[1]  0.04521533  0.02109315 -1.34683880 -1.33808417

return_mean1    29.182461
return_mean2    29.177771
return_sd1      85.981361
return_sd2      95.044653
dtype: float64
239.38624615366575
---
[1]  0.041703482 -0.003415649 -1.389596598 -1.345296644

return_mean1    29.774765
return_mean2    28.484432
return_sd1      85.193859
return_sd2      94.247371
dtype: float64
237.70042776906172
---
[1]  0.68910144  0.02109315 -1.34683880 -1.33808494

return_mean1    208.03