# Test Base Input Data

ほっといてもなかなかデータ出来ないのでとりあえず叩き台のインプットデータ作成

とりあえず
* ドル円為替（終値)
* 日経平均（終値)
* S&P500（終値, Volume, 修正終値）
* JGB10Y
* VIX（終値)
だけ用意.

またこれらに対して欠損日は線形補間とし、
* 絶対額
* 前日比
* 30日移動平均
* 120日移動平均
を暫定的に追加.

正規化はしていないので、インプット前に実施する必要あり.

## package読み込み

In [24]:
import quandl as ql
import pandas as pd

In [25]:
ql.ApiConfig.api_key = 'CbzNMfBFSFEFsiNNAe97' # APIKeyを置かないと取得制限

## 各種データ読み込み
後から追加可能

In [26]:
# 日本国債金利(10年)
df_Interest_JP = ql.get('MOFJ/INTEREST_RATE_JAPAN_10Y', start_date='2001-01-01')
df_Interest_JP.columns = ["JGB10"]
df_Interest_JP.head(10)

Unnamed: 0_level_0,JGB10
Date,Unnamed: 1_level_1
2001-01-04,1.639
2001-01-05,1.63
2001-01-09,1.565
2001-01-10,1.581
2001-01-11,1.519
2001-01-12,1.48
2001-01-15,1.499
2001-01-16,1.538
2001-01-17,1.553
2001-01-18,1.5


In [27]:
# USD/JPY
df_JPYUSD = ql.get('FRED/DEXJPUS', start_date='2001-01-01')
df_JPYUSD.columns = ["JP/US"]
df_JPYUSD.head(10)

Unnamed: 0_level_0,JP/US
DATE,Unnamed: 1_level_1
2001-01-02,114.73
2001-01-03,114.26
2001-01-04,115.47
2001-01-05,116.19
2001-01-08,115.97
2001-01-09,116.64
2001-01-10,116.26
2001-01-11,117.56
2001-01-12,117.76
2001-01-16,117.28


In [28]:
# 日経平均株価
df_STOCKJP = ql.get('NIKKEI/INDEX'+".4", start_date='2001-01-01')
df_STOCKJP.columns = ["Nikkei/Close"]
df_STOCKJP.head(10)

Unnamed: 0_level_0,Nikkei/Close
Date,Unnamed: 1_level_1
2001-01-04,13691.49
2001-01-05,13867.61
2001-01-09,13610.51
2001-01-10,13432.65
2001-01-11,13201.07
2001-01-12,13347.74
2001-01-15,13506.23
2001-01-16,13584.45
2001-01-17,13667.63
2001-01-18,13873.92


In [29]:
# S&P株価　終値、ボリューム、修正後終値
df_STOCKSP = ql.get('YAHOO/INDEX_GSPC', start_date="2001-01-01")
df_STOCKSP = df_STOCKSP[["Close","Volume","Adjusted Close"]]
df_STOCKSP.columns = ["S&P/Close","S&P/Volume","S&P/AdjClose"]
df_STOCKSP.head(10)

Unnamed: 0_level_0,S&P/Close,S&P/Volume,S&P/AdjClose
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2001-01-02,1283.27002,1129400000.0,1283.27002
2001-01-03,1347.560059,1880700000.0,1347.560059
2001-01-04,1333.339966,2131000000.0,1333.339966
2001-01-05,1298.349976,1430800000.0,1298.349976
2001-01-08,1295.859985,1115500000.0,1295.859985
2001-01-09,1300.800049,1191300000.0,1300.800049
2001-01-10,1313.27002,1296500000.0,1313.27002
2001-01-11,1326.819946,1411200000.0,1326.819946
2001-01-12,1318.550049,1276000000.0,1318.550049
2001-01-16,1326.650024,1205700000.0,1326.650024


In [30]:
# VIX
df_VIX = ql.get('NIKKEI/INDEX'+".4", start_date='2001-01-01')
df_VIX.columns = ["VIX/Close"]
df_VIX.tail(10)

Unnamed: 0_level_0,VIX/Close
Date,Unnamed: 1_level_1
2016-06-24,14952.02
2016-06-27,15309.21
2016-06-28,15323.14
2016-06-29,15566.83
2016-06-30,15575.92
2016-07-01,15682.48
2016-07-04,15775.8
2016-07-05,15669.33
2016-07-06,15378.99
2016-07-07,15276.24


## Data前処理
* 結合
* 欠損処理
* 前日比データ追加
* 移動平均データ追加

In [31]:
#横方向への結合, 'inner'で存在する箇所のみ
merge_list = [df_JPYUSD, df_Interest_JP, df_STOCKJP, df_STOCKSP, df_VIX]
df_merge = pd.concat(merge_list, axis=1)
df_merge.tail(5)

Unnamed: 0,JP/US,JGB10,Nikkei/Close,S&P/Close,S&P/Volume,S&P/AdjClose,VIX/Close
2016-07-01,102.55,-0.262,15682.48,2102.949951,3458890000.0,2102.949951,15682.48
2016-07-04,,-0.257,15775.8,,,,15775.8
2016-07-05,,-0.257,15669.33,2088.550049,3658380000.0,2088.550049,15669.33
2016-07-06,,-0.278,15378.99,2099.72998,3909380000.0,2099.72998,15378.99
2016-07-07,,,15276.24,2097.899902,3604550000.0,2097.899902,15276.24


In [32]:
# 欠損処理など
Data=df_merge.to_period(freq="D") # Dayをタイムピリオドに設定
Data = Data.interpolate() # データの前後で線形補間(データの前後以外nanはなくなる)
Data = Data.dropna(axis=0) # nanの行を落とす
Data.tail(5)

Unnamed: 0,JP/US,JGB10,Nikkei/Close,S&P/Close,S&P/Volume,S&P/AdjClose,VIX/Close
2016-07-01,102.55,-0.262,15682.48,2102.949951,3458890000.0,2102.949951,15682.48
2016-07-04,102.55,-0.257,15775.8,2095.75,3558635000.0,2095.75,15775.8
2016-07-05,102.55,-0.257,15669.33,2088.550049,3658380000.0,2088.550049,15669.33
2016-07-06,102.55,-0.278,15378.99,2099.72998,3909380000.0,2099.72998,15378.99
2016-07-07,102.55,-0.278,15276.24,2097.899902,3604550000.0,2097.899902,15276.24


In [33]:
# 前期比、移動平均
DataLag = Data.pct_change() # 前期比
DataLag.columns = [
    "JP/US.Lag", "JGB10.Lag", "Nikkei/Close.Lag", "S&P/Close.Lag" , "S&P/Volume.Lag", "S&P/AdjClose.Lag", "VIX/Close.Lag"]

DataMoveAve30 = pd.rolling_mean(Data,30) # 30day移動平均
DataMoveAve30.columns = [
    "JP/US.MA30", "JGB10.MA30", "Nikkei/Close.MA30", "S&P/Close.MA30" , "S&P/Volume.MA30", "S&P/AdjClose.MA30", "VIX/Close.MA30"]

DataMoveAve120 = pd.rolling_mean(Data,120) # 120day移動平均
DataMoveAve120.columns = [
    "JP/US.MA120", "JGB10.MA120", "Nikkei/Close.MA120", "S&P/Close.MA120" , "S&P/Volume.MA120", "S&P/AdjClose.MA120", "VIX/Close.MA120"]

	DataFrame.rolling(window=30,center=False).mean()
	DataFrame.rolling(window=120,center=False).mean()


In [34]:
# 前期比と移動平均をマージ
InputList = [Data, DataLag, DataMoveAve30, DataMoveAve120 ]
InputData = pd.concat(InputList, axis=1)
InputData = InputData.dropna(axis=0) # nanの行を落とす
InputData.head(10)

Unnamed: 0,JP/US,JGB10,Nikkei/Close,S&P/Close,S&P/Volume,S&P/AdjClose,VIX/Close,JP/US.Lag,JGB10.Lag,Nikkei/Close.Lag,...,S&P/Volume.MA30,S&P/AdjClose.MA30,VIX/Close.MA30,JP/US.MA120,JGB10.MA120,Nikkei/Close.MA120,S&P/Close.MA120,S&P/Volume.MA120,S&P/AdjClose.MA120,VIX/Close.MA120
2001-06-20,123.3,1.201,12674.64,1223.140015,1350100000.0,1223.140015,12674.64,0.002684,0.029135,0.007983,...,1102100000.0,1261.507666,13461.619,120.279083,1.321554,13386.141,1254.843961,1200070000.0,1254.843961,13386.141
2001-06-21,124.4,1.182,12962.43,1237.040039,1546820000.0,1237.040039,12962.43,0.008921,-0.01582,0.022706,...,1118437000.0,1260.902999,13426.440333,120.3535,1.317746,13380.0655,1254.041461,1195202000.0,1254.041461,13380.0655
2001-06-22,124.25,1.171,13044.61,1225.349976,1189200000.0,1225.349976,13044.61,-0.001206,-0.009306,0.00634,...,1127871000.0,1260.225663,13393.13,120.420667,1.313921,13373.207167,1253.433128,1193188000.0,1253.433128,13373.207167
2001-06-25,124.02,1.144,12896.47,1218.599976,1050100000.0,1218.599976,12896.47,-0.001851,-0.023057,-0.011356,...,1134267000.0,1259.214994,13360.578333,120.48775,1.310142,13366.185583,1252.789294,1192644000.0,1252.789294,13366.185583
2001-06-26,123.82,1.14,12978.82,1216.76001,1198900000.0,1216.76001,12978.82,-0.001613,-0.003497,0.006385,...,1138504000.0,1258.125663,13324.738,120.547583,1.3066,13360.9215,1252.088961,1192707000.0,1252.088961,13360.9215
2001-06-27,124.25,1.133,12828.98,1211.069946,1162100000.0,1211.069946,12828.98,0.003473,-0.00614,-0.011545,...,1130397000.0,1255.661662,13295.895,120.614167,1.302867,13355.890917,1251.237293,1191587000.0,1251.237293,13355.890917
2001-06-28,124.63,1.125,12679.88,1226.199951,1327300000.0,1226.199951,12679.88,0.003058,-0.007061,-0.011622,...,1129454000.0,1253.585327,13254.868667,120.673083,1.299583,13351.547667,1250.398794,1190888000.0,1250.398794,13351.547667
2001-06-29,124.73,1.183,12969.05,1224.380005,1832360000.0,1224.380005,12969.05,0.000802,0.051556,0.022805,...,1152839000.0,1251.332662,13224.578,120.731167,1.297108,13348.391917,1249.614043,1195524000.0,1249.614043,13348.391917
2001-07-02,124.22,1.263,12751.18,1236.719971,1128300000.0,1236.719971,12751.18,-0.004089,0.067625,-0.016799,...,1151286000.0,1248.795663,13177.056333,120.787,1.295142,13342.099833,1248.898376,1194586000.0,1248.898376,13342.099833
2001-07-03,124.48,1.272,12817.41,1234.449951,622110000.0,1234.449951,12817.41,0.002093,0.007126,0.005194,...,1130010000.0,1246.297994,13134.597,120.847,1.292925,13335.707833,1248.130042,1189723000.0,1248.130042,13335.707833


## InputデータとOutput(予測)データを分割

ここでは日経平均の騰落率の水準を予測

In [35]:
# 予測系列は除外
OutList = ["Nikkei/Close", "Nikkei/Close.Lag", "Nikkei/Close.MA30", "Nikkei/Close.MA120"]
InputDF = InputData.drop(OutList, axis=1)
InputDF.head(10)

Unnamed: 0,JP/US,JGB10,S&P/Close,S&P/Volume,S&P/AdjClose,VIX/Close,JP/US.Lag,JGB10.Lag,S&P/Close.Lag,S&P/Volume.Lag,...,S&P/Close.MA30,S&P/Volume.MA30,S&P/AdjClose.MA30,VIX/Close.MA30,JP/US.MA120,JGB10.MA120,S&P/Close.MA120,S&P/Volume.MA120,S&P/AdjClose.MA120,VIX/Close.MA120
2001-06-20,123.3,1.201,1223.140015,1350100000.0,1223.140015,12674.64,0.002684,0.029135,0.008709,0.139421,...,1261.507666,1102100000.0,1261.507666,13461.619,120.279083,1.321554,1254.843961,1200070000.0,1254.843961,13386.141
2001-06-21,124.4,1.182,1237.040039,1546820000.0,1237.040039,12962.43,0.008921,-0.01582,0.011364,0.145708,...,1260.902999,1118437000.0,1260.902999,13426.440333,120.3535,1.317746,1254.041461,1195202000.0,1254.041461,13380.0655
2001-06-22,124.25,1.171,1225.349976,1189200000.0,1225.349976,13044.61,-0.001206,-0.009306,-0.00945,-0.231197,...,1260.225663,1127871000.0,1260.225663,13393.13,120.420667,1.313921,1253.433128,1193188000.0,1253.433128,13373.207167
2001-06-25,124.02,1.144,1218.599976,1050100000.0,1218.599976,12896.47,-0.001851,-0.023057,-0.005509,-0.116969,...,1259.214994,1134267000.0,1259.214994,13360.578333,120.48775,1.310142,1252.789294,1192644000.0,1252.789294,13366.185583
2001-06-26,123.82,1.14,1216.76001,1198900000.0,1216.76001,12978.82,-0.001613,-0.003497,-0.00151,0.141701,...,1258.125663,1138504000.0,1258.125663,13324.738,120.547583,1.3066,1252.088961,1192707000.0,1252.088961,13360.9215
2001-06-27,124.25,1.133,1211.069946,1162100000.0,1211.069946,12828.98,0.003473,-0.00614,-0.004676,-0.030695,...,1255.661662,1130397000.0,1255.661662,13295.895,120.614167,1.302867,1251.237293,1191587000.0,1251.237293,13355.890917
2001-06-28,124.63,1.125,1226.199951,1327300000.0,1226.199951,12679.88,0.003058,-0.007061,0.012493,0.142156,...,1253.585327,1129454000.0,1253.585327,13254.868667,120.673083,1.299583,1250.398794,1190888000.0,1250.398794,13351.547667
2001-06-29,124.73,1.183,1224.380005,1832360000.0,1224.380005,12969.05,0.000802,0.051556,-0.001484,0.380517,...,1251.332662,1152839000.0,1251.332662,13224.578,120.731167,1.297108,1249.614043,1195524000.0,1249.614043,13348.391917
2001-07-02,124.22,1.263,1236.719971,1128300000.0,1236.719971,12751.18,-0.004089,0.067625,0.010079,-0.384237,...,1248.795663,1151286000.0,1248.795663,13177.056333,120.787,1.295142,1248.898376,1194586000.0,1248.898376,13342.099833
2001-07-03,124.48,1.272,1234.449951,622110000.0,1234.449951,12817.41,0.002093,0.007126,-0.001836,-0.448631,...,1246.297994,1130010000.0,1246.297994,13134.597,120.847,1.292925,1248.130042,1189723000.0,1248.130042,13335.707833


In [36]:
# Nikkeiを当てに行くことにする
OutputDF = InputData[OutList]
OutputDF.head(10)

Unnamed: 0,Nikkei/Close,Nikkei/Close.Lag,Nikkei/Close.MA30,Nikkei/Close.MA120
2001-06-20,12674.64,0.007983,13461.619,13386.141
2001-06-21,12962.43,0.022706,13426.440333,13380.0655
2001-06-22,13044.61,0.00634,13393.13,13373.207167
2001-06-25,12896.47,-0.011356,13360.578333,13366.185583
2001-06-26,12978.82,0.006385,13324.738,13360.9215
2001-06-27,12828.98,-0.011545,13295.895,13355.890917
2001-06-28,12679.88,-0.011622,13254.868667,13351.547667
2001-06-29,12969.05,0.022805,13224.578,13348.391917
2001-07-02,12751.18,-0.016799,13177.056333,13342.099833
2001-07-03,12817.41,0.005194,13134.597,13335.707833


## 教師データの作成

次の日の騰落率を３区分に分類し、フラグを立てる

In [37]:
# 当てたいケースは次の日なのでずらす
OutputDF2 = OutputDF.join(OutputDF.shift(-1),rsuffix="_"+str(1))
OutputDF2.tail(10)

Unnamed: 0,Nikkei/Close,Nikkei/Close.Lag,Nikkei/Close.MA30,Nikkei/Close.MA120,Nikkei/Close_1,Nikkei/Close.Lag_1,Nikkei/Close.MA30_1,Nikkei/Close.MA120_1
2016-06-24,14952.02,-0.079216,16423.555,16580.480375,15309.21,0.023889,16384.982,16562.569958
2016-06-27,15309.21,0.023889,16384.982,16562.569958,15323.14,0.00091,16340.66,16546.771458
2016-06-28,15323.14,0.00091,16340.66,16546.771458,15566.83,0.015903,16304.731333,16528.864792
2016-06-29,15566.83,0.015903,16304.731333,16528.864792,15575.92,0.000584,16269.04,16514.989542
2016-06-30,15575.92,0.000584,16269.04,16514.989542,15682.48,0.006841,16233.911,16502.784292
2016-07-01,15682.48,0.006841,16233.911,16502.784292,15775.8,0.005951,16204.617667,16492.952875
2016-07-04,15775.8,0.005951,16204.617667,16492.952875,15669.33,-0.006749,16176.97,16481.460875
2016-07-05,15669.33,-0.006749,16176.97,16481.460875,15378.99,-0.018529,16131.024667,16472.817542
2016-07-06,15378.99,-0.018529,16131.024667,16472.817542,15276.24,-0.006681,16081.150667,16466.642375
2016-07-07,15276.24,-0.006681,16081.150667,16466.642375,,,,


フラグを立てる時、for文ではなく内包表記など早い記法で書くこと

思いつかなかったのでそれぞれでフラグ立てて、足すことにした

In [38]:
# Closeの変化率を区切る
# 1%以上なら2,-1%以下なら1,それ以外は0
OutputAns = OutputDF2.assign(
    D = [2 if OutputDF2["Nikkei/Close.Lag_1"][x]  >= 0.01 else 0 for x in range(OutputDF2.shape[0])]).assign(
    E = [1 if OutputDF2["Nikkei/Close.Lag_1"][x]  <= -0.01 else 0 for x in range(OutputDF2.shape[0])]).assign(
    F = [0 if OutputDF2["Nikkei/Close.Lag_1"][x]  >= -0.01 and OutputDF2["Nikkei/Close.Lag_1"][x]  <= 0.01  else 0 for x in range(OutputDF2.shape[0])])[
    ["Nikkei/Close.Lag_1","D","E","F"]]
OutputAns.tail(10)

Unnamed: 0,Nikkei/Close.Lag_1,D,E,F
2016-06-24,0.023889,2,0,0
2016-06-27,0.00091,0,0,0
2016-06-28,0.015903,2,0,0
2016-06-29,0.000584,0,0,0
2016-06-30,0.006841,0,0,0
2016-07-01,0.005951,0,0,0
2016-07-04,-0.006749,0,0,0
2016-07-05,-0.018529,0,1,0
2016-07-06,-0.006681,0,0,0
2016-07-07,,0,0,0


In [39]:
# 教師データを作成
OutputAns["Ans"] = OutputAns[["D","E","F"]].sum(axis=1)
OutputAns.tail(15)

Unnamed: 0,Nikkei/Close.Lag_1,D,E,F,Ans
2016-06-17,0.023439,2,0,0,2
2016-06-20,0.012766,2,0,0,2
2016-06-21,-0.006394,0,0,0,0
2016-06-22,0.010745,2,0,0,2
2016-06-23,-0.079216,0,1,0,1
2016-06-24,0.023889,2,0,0,2
2016-06-27,0.00091,0,0,0,0
2016-06-28,0.015903,2,0,0,2
2016-06-29,0.000584,0,0,0,0
2016-06-30,0.006841,0,0,0,0


## 学習用にInput Dataに教師データくっつける

別の学習用スクリプトの入力ファイルとする


In [40]:
InputCSV = pd.concat([InputDF,OutputAns], axis=1)
InputCSV = InputCSV.dropna(axis=0)
InputCSV = InputCSV.drop(["D","E","F"], axis=1)
InputCSV.tail(10)

Unnamed: 0,JP/US,JGB10,S&P/Close,S&P/Volume,S&P/AdjClose,VIX/Close,JP/US.Lag,JGB10.Lag,S&P/Close.Lag,S&P/Volume.Lag,...,S&P/AdjClose.MA30,VIX/Close.MA30,JP/US.MA120,JGB10.MA120,S&P/Close.MA120,S&P/Volume.MA120,S&P/AdjClose.MA120,VIX/Close.MA120,Nikkei/Close.Lag_1,Ans
2016-06-23,105.9,-0.146,2113.320068,3297940000.0,2113.320068,16238.35,0.012816,-0.026667,0.013364,0.040964,...,2083.372172,16472.228,111.618625,-0.035367,2012.685413,4149947000.0,2012.685413,16603.363208,-0.079216,1
2016-06-24,102.26,-0.201,2037.300049,7597450000.0,2037.300049,14952.02,-0.034372,0.376712,-0.035972,1.303696,...,2083.061841,16423.555,111.489625,-0.038958,2013.645997,4174385000.0,2013.645997,16580.480375,0.023889,2
2016-06-27,101.66,-0.202,2000.540039,5431220000.0,2000.540039,15309.21,-0.005867,0.004975,-0.018043,-0.285126,...,2080.857845,16384.982,111.357792,-0.042521,2014.28658,4181251000.0,2014.28658,16562.569958,0.00091,0
2016-06-28,102.71,-0.233,2036.089966,4385810000.0,2036.089966,15323.14,0.010329,0.153465,0.01777,-0.192482,...,2080.487179,16340.66,111.232208,-0.046304,2015.098329,4177072000.0,2015.098329,16546.771458,0.015903,2
2016-06-29,102.68,-0.242,2070.77002,4241740000.0,2070.77002,15566.83,-0.000292,0.038627,0.017033,-0.032849,...,2081.258512,16304.731333,111.104042,-0.050071,2016.602412,4170028000.0,2016.602412,16528.864792,0.000584,0
2016-06-30,102.77,-0.237,2098.860107,4622820000.0,2098.860107,15575.92,0.000877,-0.020661,0.013565,0.08984,...,2083.219181,16269.04,110.976875,-0.053971,2018.07758,4164875000.0,2018.07758,16514.989542,0.006841,0
2016-07-01,102.55,-0.262,2102.949951,3458890000.0,2102.949951,15682.48,-0.002141,0.105485,0.001949,-0.251779,...,2084.906844,16233.911,110.858292,-0.057996,2019.932747,4148129000.0,2019.932747,16502.784292,0.005951,0
2016-07-04,102.55,-0.257,2095.75,3558635000.0,2095.75,15775.8,0.0,-0.019084,-0.003424,0.028837,...,2086.497176,16204.617667,110.736042,-0.061854,2021.723747,4134464000.0,2021.723747,16492.952875,-0.006749,0
2016-07-05,102.55,-0.257,2088.550049,3658380000.0,2088.550049,15669.33,0.0,0.0,-0.003436,0.028029,...,2086.913509,16176.97,110.610125,-0.065796,2023.450581,4123881000.0,2023.450581,16481.460875,-0.018529,1
2016-07-06,102.55,-0.278,2099.72998,3909380000.0,2099.72998,15378.99,0.0,0.081712,0.005353,0.06861,...,2087.21984,16131.024667,110.494875,-0.069912,2025.453915,4102992000.0,2025.453915,16472.817542,-0.006681,0


### CSV出力する場合は次をコメントアウトしてね

In [41]:
#InputCSV.to_csv("InputDNNMarket.csv")