# リッジ回帰を行う

## 準備

In [1]:
import pandas as pd
import numpy as np
import glob
from sklearn.linear_model import Ridge
from sklearn import preprocessing

In [2]:
stock_name_list = glob.glob("../output/*.csv")
stock_name_list

['../output\\LSIML_RV_mitsuifin_1sec.csv',
 '../output\\LSIML_RV_mitsuifin_5sec.csv']

In [3]:
df = pd.read_csv(stock_name_list[0])
df

Unnamed: 0,date,RV_sec1,LSIML_b=10,LSIML_b=50,LSIML_b=100,Num_jump_b=10,Num_jump_b=50,Num_jump_b=100,Size_jump_b=10,Size_jump_b=50,Size_jump_b=100
0,2018-09-03,0.000100,0.000034,0.000046,0.000046,1,4,6,0.000012,0.000024,0.000025
1,2018-09-04,0.000152,0.000040,0.000041,0.000047,1,5,9,0.000027,0.000028,0.000032
2,2018-09-05,0.000086,0.000025,0.000031,0.000036,1,4,8,0.000014,0.000017,0.000018
3,2018-09-06,0.000094,0.000038,0.000034,0.000039,0,4,6,0.000000,0.000011,0.000011
4,2018-09-07,0.000123,0.000039,0.000055,0.000044,2,3,14,0.000044,0.000023,0.000038
...,...,...,...,...,...,...,...,...,...,...,...
699,2021-07-26,0.000125,0.000025,0.000035,0.000046,1,4,5,0.000018,0.000016,0.000018
700,2021-07-27,0.000091,0.000023,0.000024,0.000031,1,4,5,0.000012,0.000013,0.000011
701,2021-07-28,0.000105,0.000031,0.000035,0.000044,1,6,7,0.000011,0.000019,0.000015
702,2021-07-29,0.000089,0.000027,0.000036,0.000035,1,4,9,0.000012,0.000019,0.000019


## データを読み込み、加工する

In [4]:
LSIML = df["LSIML_b=50"] + df["Size_jump_b=50"]
LSIML

0      0.000070
1      0.000069
2      0.000048
3      0.000045
4      0.000078
         ...   
699    0.000051
700    0.000037
701    0.000054
702    0.000055
703    0.000047
Length: 704, dtype: float64

### 移動平均, 一日ずれデータを計算する

In [5]:
week = LSIML.rolling(5).mean().shift()
month = LSIML.rolling(22).mean().shift()

In [6]:
month.dropna()

22     0.000075
23     0.000077
24     0.000083
25     0.000085
26     0.000089
         ...   
699    0.000053
700    0.000050
701    0.000050
702    0.000049
703    0.000050
Length: 682, dtype: float64

In [7]:
shifted_LSIML = LSIML.shift()
shifted_LSIML

0           NaN
1      0.000070
2      0.000069
3      0.000048
4      0.000045
         ...   
699    0.000057
700    0.000051
701    0.000037
702    0.000054
703    0.000055
Length: 704, dtype: float64

In [8]:
data = pd.concat([LSIML, shifted_LSIML, week, month], axis=1)
data.columns = ["tom", "tod", "week", "month"]
data

Unnamed: 0,tom,tod,week,month
0,0.000070,,,
1,0.000069,0.000070,,
2,0.000048,0.000069,,
3,0.000045,0.000048,,
4,0.000078,0.000045,,
...,...,...,...,...
699,0.000051,0.000057,0.000054,0.000053
700,0.000037,0.000051,0.000055,0.000050
701,0.000054,0.000037,0.000052,0.000050
702,0.000055,0.000054,0.000055,0.000049


### ジャンプサイズ

In [9]:
data["jump"] = df["Size_jump_b=50"].shift()

### 対数を取る

In [10]:
data["log_tom"] = np.log(data["tom"])
data["log_tod"] = np.log(data["tod"])
data["log_week"] = np.log(data["week"])
data["log_month"] = np.log(data["month"])
data

Unnamed: 0,tom,tod,week,month,jump,log_tom,log_tod,log_week,log_month
0,0.000070,,,,,-9.572020,,,
1,0.000069,0.000070,,,0.000024,-9.582794,-9.572020,,
2,0.000048,0.000069,,,0.000028,-9.953234,-9.582794,,
3,0.000045,0.000048,,,0.000017,-10.009332,-9.953234,,
4,0.000078,0.000045,,,0.000011,-9.454951,-10.009332,,
...,...,...,...,...,...,...,...,...,...
699,0.000051,0.000057,0.000054,0.000053,0.000024,-9.874341,-9.766005,-9.822059,-9.836261
700,0.000037,0.000051,0.000055,0.000050,0.000016,-10.203925,-9.874341,-9.812816,-9.895534
701,0.000054,0.000037,0.000052,0.000050,0.000013,-9.822236,-10.203925,-9.865343,-9.908713
702,0.000055,0.000054,0.000055,0.000049,0.000019,-9.805023,-9.822236,-9.815572,-9.923729


In [11]:
data["log_jump"] = np.log(1 + preprocessing.scale(data["jump"]))
data

Unnamed: 0,tom,tod,week,month,jump,log_tom,log_tod,log_week,log_month,log_jump
0,0.000070,,,,,-9.572020,,,,
1,0.000069,0.000070,,,0.000024,-9.582794,-9.572020,,,-0.373658
2,0.000048,0.000069,,,0.000028,-9.953234,-9.582794,,,-0.301884
3,0.000045,0.000048,,,0.000017,-10.009332,-9.953234,,,-0.514645
4,0.000078,0.000045,,,0.000011,-9.454951,-10.009332,,,-0.650463
...,...,...,...,...,...,...,...,...,...,...
699,0.000051,0.000057,0.000054,0.000053,0.000024,-9.874341,-9.766005,-9.822059,-9.836261,-0.374343
700,0.000037,0.000051,0.000055,0.000050,0.000016,-10.203925,-9.874341,-9.812816,-9.895534,-0.530175
701,0.000054,0.000037,0.000052,0.000050,0.000013,-9.822236,-10.203925,-9.865343,-9.908713,-0.603057
702,0.000055,0.000054,0.000055,0.000049,0.000019,-9.805023,-9.822236,-9.815572,-9.923729,-0.474161


### 日次データを読み込む

In [13]:
daily_data = pd.read_csv("../data/daily_data\Daily_Price_mitsuifin.csv").shift()
daily_data

Unnamed: 0,date,log_price,daily_return
0,,,
1,2018-09-03,8.381144,
2,2018-09-04,8.377701,-0.003443
3,2018-09-05,8.373092,-0.004609
4,2018-09-06,8.373092,0.000000
...,...,...,...
699,2021-07-21,8.209852,0.001361
700,2021-07-26,8.214194,0.004342
701,2021-07-27,8.226306,0.012112
702,2021-07-28,8.222016,-0.004290


In [14]:
asym_list = []
for i in range(len(daily_data)):
    asym_list.append(min([0, daily_data["daily_return"].iloc[i]]))

In [15]:
data["asym"] = asym_list

### 欠損値を削除する

In [16]:
data = data.dropna()
data

Unnamed: 0,tom,tod,week,month,jump,log_tom,log_tod,log_week,log_month,log_jump,asym
22,0.000105,0.000183,0.000099,0.000075,0.000133,-9.158795,-8.605646,-9.215656,-9.492234,0.727457,0.000000
23,0.000200,0.000105,0.000104,0.000077,0.000052,-8.515403,-9.158795,-9.169639,-9.470987,0.041230,0.000000
24,0.000100,0.000200,0.000130,0.000083,0.000070,-9.213129,-8.515403,-8.945842,-9.396302,0.245277,-0.012145
25,0.000129,0.000100,0.000139,0.000085,0.000027,-8.957312,-9.213129,-8.878672,-9.368154,-0.326463,0.000000
26,0.000103,0.000129,0.000143,0.000089,0.000048,-9.179210,-8.957312,-8.849551,-9.324511,-0.001372,-0.042254
...,...,...,...,...,...,...,...,...,...,...,...
699,0.000051,0.000057,0.000054,0.000053,0.000024,-9.874341,-9.766005,-9.822059,-9.836261,-0.374343,0.000000
700,0.000037,0.000051,0.000055,0.000050,0.000016,-10.203925,-9.874341,-9.812816,-9.895534,-0.530175,0.000000
701,0.000054,0.000037,0.000052,0.000050,0.000013,-9.822236,-10.203925,-9.865343,-9.908713,-0.603057,0.000000
702,0.000055,0.000054,0.000055,0.000049,0.000019,-9.805023,-9.822236,-9.815572,-9.923729,-0.474161,-0.004290


# リッジ回帰を行う

In [17]:
data

Unnamed: 0,tom,tod,week,month,jump,log_tom,log_tod,log_week,log_month,log_jump,asym
22,0.000105,0.000183,0.000099,0.000075,0.000133,-9.158795,-8.605646,-9.215656,-9.492234,0.727457,0.000000
23,0.000200,0.000105,0.000104,0.000077,0.000052,-8.515403,-9.158795,-9.169639,-9.470987,0.041230,0.000000
24,0.000100,0.000200,0.000130,0.000083,0.000070,-9.213129,-8.515403,-8.945842,-9.396302,0.245277,-0.012145
25,0.000129,0.000100,0.000139,0.000085,0.000027,-8.957312,-9.213129,-8.878672,-9.368154,-0.326463,0.000000
26,0.000103,0.000129,0.000143,0.000089,0.000048,-9.179210,-8.957312,-8.849551,-9.324511,-0.001372,-0.042254
...,...,...,...,...,...,...,...,...,...,...,...
699,0.000051,0.000057,0.000054,0.000053,0.000024,-9.874341,-9.766005,-9.822059,-9.836261,-0.374343,0.000000
700,0.000037,0.000051,0.000055,0.000050,0.000016,-10.203925,-9.874341,-9.812816,-9.895534,-0.530175,0.000000
701,0.000054,0.000037,0.000052,0.000050,0.000013,-9.822236,-10.203925,-9.865343,-9.908713,-0.603057,0.000000
702,0.000055,0.000054,0.000055,0.000049,0.000019,-9.805023,-9.822236,-9.815572,-9.923729,-0.474161,-0.004290


元系列に対して、非対称性が存在するか確かめる

In [18]:
x_names = ["tod", "week", "month", "asym", "jump"]
ridge = Ridge().fit(data[x_names], data["tom"])
# 係数を出力する
res_coef = pd.DataFrame(ridge.coef_).T 
res_coef.columns = x_names
res_coef

Unnamed: 0,tod,week,month,asym,jump
0,1.5e-05,1.2e-05,6e-06,-0.000388,5e-06


対数系列に対して、非対称性が存在するか確かめる

In [19]:
x_names = ["log_tod", "log_week", "log_month", "asym", "log_jump"]
ridge = Ridge().fit(data[x_names], data["log_tom"])
# 係数を出力する
res_coef = pd.DataFrame(ridge.coef_).T 
res_coef.columns = x_names
res_coef

Unnamed: 0,log_tod,log_week,log_month,asym,jump
0,0.460301,0.357776,0.098592,-0.5741,-0.000803
