In [1]:
#できること一覧
#データ取得
#dataframeに格納
#時間可視化
#移動平均線
#ゴールデン・デッドクロス
#上下判定
#ボリンジャーバンド
#トレンドラインを作成（支持線・抵抗線・直近）
#機械学習（ランダムフォレスト）
#gfs
#グラフ



In [2]:
#python_bitbankccのパッケージをインポート
#cloud9で起動するときのコマンド
#jupyter notebook --ip $IP --port $PORT --no-browser
import python_bitbankcc 
import datetime
import os 
import time
import numpy as np
import pandas as pd
import sys
from dateutil.relativedelta import relativedelta
#トレンドラインを引くため
from scipy.stats import linregress
#正規化
from sklearn.preprocessing import MinMaxScaler

In [3]:
#機械学習用のモジュール
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from sklearn import linear_model
% matplotlib inline
from __future__ import print_function
import copy
import matplotlib
matplotlib.style.use('ggplot')

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [4]:
# public API classのオブジェクトを取得
pub = python_bitbankcc.public()

In [5]:
#APIから基本データの取得、dataframeへ挿入
def get_candle(trade_name,span,back_day):
    # ロウソク足データを取得
    pub = python_bitbankcc.public()
    value = pub.get_candlestick( trade_name,span, back_day )
    candle = value['candlestick'][0]
    #print(candle['ohlcv'][0])
    dataframe = pd.DataFrame(candle['ohlcv'],columns=["Open", "High","Low","Close","Volume","Timestamp"])
    return dataframe


In [6]:
#指定した日から今日までの基本データをdataframeにして取得
#back_day,todayはdatetime
# span = ['1min', '5min', '15min', '30min', '1hour', '4hour', '8hour', '12hour', '1day', '1week']
#trade_name = ['btc_jpy', 'xrp_jpy', 'ltc_btc', 'eth_btc', 'mona_jpy', 'mona_btc', 'bcc_jpy', 'bcc_btc']

def make_df(trade_name,span,back_day,today):
    i = 1
    if (span =='1min')or(span== '5min')or(span== '15min')or(span== '30min')or(span== '1hour'):
        #基準が９時なので、そこを合わせてあげる
        if 0 <= today.hour < 9:
            k = 1
        elif 9 <= today.hour <= 23:
            k = 0

        while back_day <= today - datetime.timedelta(days=k):
            if i == 1:
                df1 = get_candle(trade_name,span,datetime.datetime.strftime(back_day, '%Y%m%d'))
                back_day = back_day + datetime.timedelta(days=1)
                #print(back_day,len(df1))
                i += 1
            else:
                df2 = get_candle(trade_name,span,datetime.datetime.strftime(back_day, '%Y%m%d'))
                df1 = pd.concat([df1, df2])
                back_day = back_day + datetime.timedelta(days=1)
                #print(back_day,len(df1))
                i += 1
    else:
        
        today = datetime.date(today.year, today.month, today.day)
        back_day = datetime.date(back_day.year, back_day.month, back_day.day)


        while back_day <= today:
            if i == 1:
                df1 = get_candle(trade_name,span,datetime.datetime.strftime(back_day, '%Y'))
                back_day = back_day + relativedelta(years=1)
                i += 1
            else:
                df2 = get_candle(trade_name,span,datetime.datetime.strftime(back_day, '%Y'))
                df1 = pd.concat([df1, df2])
                back_day = back_day +  relativedelta(years=1)
                i += 1
            

    return df1



In [7]:
def read_date(x):
    return datetime.datetime.fromtimestamp(x/1000)


In [8]:

# 単純移動平均（SMA）を取得する関数
#上記のdfデータフレーム
#numいくつの平均を求めるか
def getMA( df,num ):
 
    tmp = []
    avg = np.array([])
    for i in range(len(df) - num + 1):
        for j in range(num):
            #print(df['Close'][i+j])
            tmp.append( df['Close'][i+j])
            
         # 平均値計算
        value = np.average(tmp)
        avg = np.append(avg,value)
        tmp = []
    
    return avg

In [9]:
def getSTD( df,num ):
 
    tmp = []
    std = np.array([])
    for i in range(len(df) - num + 1):
        for j in range(num):
            #print(df['Close'][i+j])
            tmp.append( df['Close'][i+j])
            
         # 平均値計算
        value = np.std(tmp)
        std = np.append(std,value)
        tmp = []
    
    return std

In [10]:
#ゴールデン・デッドクロス判定
def golden_dead(data_df1):
    golden=np.zeros(len(data_df1['Cross']),dtype=int)
    dead=np.zeros(len(data_df1['Cross']),dtype=int)
    for i in range(len(data_df1['Cross'])-1):
        x = data_df1['Cross'][i]
        y = data_df1['Cross'][i+1]
        if ((x <= 0) & (y<=0)) |((x >= 0) & (y>=0)):
            pass
        elif ((x <= 0) & (y>=0)) :
            golden=np.insert(golden,i+1,1)
            golden=np.delete(golden,i+2)
        elif ((x >= 0) & (y<=0)) :
            dead=np.insert(dead,i+1,1)
            dead=np.delete(dead,i+2)

    return golden,dead
    

In [11]:
#highトレンドラインを作成する関数
#df データフレーム
#num どれだけ前から
def make_high_trend_line(df,num):
    slope= np.zeros(num-1,dtype=float)
    intercept = np.zeros(num-1,dtype=float)
    for i in range(len(df)-num+1):        
        df_fin = df.copy()
        df_fin = df_fin[i:i+num]
        df_high = df.copy()
        df_high = df_high[i:i+num]
    
        # 高値のトレンドライン
        while len(df_high)>3:
            reg_1 = linregress(x=df_high['index'],y=df_high['High'])
            df_high  =  df_high.loc[df_high['High']>reg_1[0]*df_high['index']+reg_1[1]]

        if len(df_high)<=1:
            pass
        else:
            reg_1 = linregress(x=df_high['index'],y=df_high['High'])
        
        slope = np.append(slope,reg_1[0])
        intercept  = np.append(intercept ,reg_1[1])
    return slope,intercept    

In [12]:
#lowトレンドラインを作成する関数
#df データフレーム
#num どれだけ前から
def make_low_trend_line(df,num):
    slope= np.zeros(num-1,dtype=float)
    intercept = np.zeros(num-1,dtype=float)
    for i in range(len(df)-num+1):        
        df_fin = df.copy()
        df_fin = df_fin[i:i+num]
        df_low = df.copy()
        df_low = df_low[i:i+num]

    # 安値のトレンドライン
        while len(df_low)>3: 
            reg_2 = linregress( x = df_low['index'], y = df_low['Low']   )
            df_low = df_low.loc[df_low['Low'] < reg_2[0] * df_low['index'] + reg_2[1]]

        if len(df_low)<=1:
            pass
        else:
            reg_2 = linregress(x = df_low['index'],y = df_low['Low'])
            
        slope = np.append(slope,reg_2[0])
        intercept  = np.append(intercept ,reg_2[1])
    return slope,intercept    

In [13]:
#slope,傾き　intercept,切片　df、データフレーム
def make_trend_value(slope,intercept,df):
    index = np.array(df['index'])
    trend_value = slope*index + intercept
    return trend_value

In [14]:
#関数の使い方
today = datetime.datetime.today() 
back_day = today - datetime.timedelta(days=90)
trade_name = 'xrp_jpy'
span = '30min'
num = 5

In [15]:
#基本データの取得
old = time.time()
data_df1 =  make_df(trade_name,span,back_day,today)
data_df1['Timestamp'] = data_df1['Timestamp'].apply(read_date)
#indexの割り振り
data_df1 = data_df1.reset_index()
#df1の値は全てstrなのでTimestamp以外floatにキャストする
#Timestampは文字列のままでいいので削除することを指定
execlude = ['Timestamp']
cast = [col for col in data_df1.columns if col not in execlude]
for item in cast:
    data_df1 = data_df1.astype({item: float})

print(time.time()-old)
print(data_df1.tail())

29.37573480606079
      index    Open    High     Low   Close        Volume           Timestamp
4342   22.0  41.339  41.346  40.902  41.230  5.392301e+06 2018-12-02 20:00:00
4343   23.0  41.230  41.350  41.150  41.305  1.683759e+06 2018-12-02 20:30:00
4344   24.0  41.305  41.500  41.250  41.416  3.133224e+06 2018-12-02 21:00:00
4345   25.0  41.427  41.699  41.402  41.450  3.466664e+06 2018-12-02 21:30:00
4346   26.0  41.450  41.589  41.380  41.395  1.182986e+06 2018-12-02 22:00:00


In [16]:
#移動平均線np
MA5 = getMA( data_df1,num)
MA25 = getMA( data_df1,num+20)
zero5 = np.zeros(num-1,dtype = float )
zero25 = np.zeros(num+20-1,dtype = float )
MA5 = np.insert(MA5, 0, zero5)
MA25 = np.insert(MA25, 0, zero25)
print(MA25,MA5)
print(MA25.shape,type(MA25),MA5.shape,type(MA5))

[ 0.       0.       0.      ... 41.82796 41.79888 41.77068] [ 0.      0.      0.     ... 41.3766 41.348  41.3592]
(4347,) <class 'numpy.ndarray'> (4347,) <class 'numpy.ndarray'>


In [17]:
#標準偏差計算
STD5 = getSTD( data_df1,num)
zero5 = np.zeros(num-1,dtype = float )
STD5 = np.insert(STD5, 0, zero5)
print(STD5,STD5.shape,type(STD5))

[0.         0.         0.         ... 0.12362298 0.07859008 0.08047708] (4347,) <class 'numpy.ndarray'>


In [18]:
#自作変数を作成する
data_df1['High-Low']=data_df1['High']-data_df1['Low']
data_df1['Close-Open']=data_df1['Close']-data_df1['Open']
data_df1['Similarity'] = data_df1['Close-Open'] / (data_df1['High-Low'] + 0.000001)
data_df1['(High-Low)*Volume '] = data_df1['High-Low']*data_df1['Volume']
data_df1['(Close-Open)*Volume '] = data_df1['Close-Open']*data_df1['Volume']
data_df1['High-Low']
data_df1['MA5'] = MA5
data_df1['MA25'] = MA25
data_df1['Cross'] = MA5 - MA25
data_df1['index'] = data_df1.index + 1
data_df1['bbd_p1']= MA5 + (STD5 * 1)
data_df1['bbd_p2']= MA5 + (STD5 * 2)
data_df1['bbd_p3']= MA5 + (STD5 * 3)
data_df1['bbd_m1']= MA5 - (STD5 * 1)
data_df1['bbd_m2']= MA5 - (STD5 * 2)
data_df1['bbd_m3']= MA5 - (STD5 * 3)

In [19]:
data_df1['golden'],data_df1['dead']  = golden_dead(data_df1)

In [20]:
old1 = time.time()
slope1,intercept1 = make_high_trend_line(df=data_df1,num=32)
print(time.time()-old1)

47.75989055633545


In [21]:
old1 = time.time()
slope2,intercept2 = make_low_trend_line(df=data_df1,num=32)
print(time.time()-old1)

50.378337383270264


In [22]:
tr_high = make_trend_value(slope1,intercept1,data_df1)
tr_low  = make_trend_value(slope2,intercept2,data_df1)

In [23]:
data_df1['high_slope'] = slope1
data_df1['low_slope'] = slope2
data_df1['tr_high'] = tr_high
data_df1['tr_low'] = tr_low
data_df1['tr_mid'] = (tr_high + tr_low)/2

In [24]:
#data作成までの時間
print(time.time()-old)
#作成変数の表示
data_df1.loc[len(data_df1)-9:]

134.19538760185242


Unnamed: 0,index,Open,High,Low,Close,Volume,Timestamp,High-Low,Close-Open,Similarity,...,bbd_m1,bbd_m2,bbd_m3,golden,dead,high_slope,low_slope,tr_high,tr_low,tr_mid
4338,4339,41.598,41.975,41.589,41.781,5186700.0,2018-12-02 18:00:00,0.386,0.183,0.474092,...,41.559869,41.480539,41.401208,0,0,-0.0835,-0.016941,41.5355,41.176471,41.355985
4339,4340,41.795,41.87,41.606,41.66,2114254.0,2018-12-02 18:30:00,0.264,-0.135,-0.511362,...,41.57134,41.493279,41.415219,0,0,-0.0835,-0.017284,41.452,41.151544,41.301772
4340,4341,41.661,41.68,41.37,41.593,3424479.0,2018-12-02 19:00:00,0.31,-0.068,-0.219354,...,41.590634,41.522868,41.455102,0,0,-0.0835,-0.017284,41.3685,41.13426,41.25138
4341,4342,41.55,41.689,41.125,41.339,5049899.0,2018-12-02 19:30:00,0.564,-0.211,-0.374113,...,41.449724,41.305248,41.160772,0,0,-0.0835,-0.017067,41.285,41.121479,41.203239
4342,4343,41.339,41.346,40.902,41.23,5392301.0,2018-12-02 20:00:00,0.444,-0.109,-0.245495,...,41.315705,41.110811,40.905916,0,0,-0.0835,-0.024069,41.2015,40.902,41.05175
4343,4344,41.23,41.35,41.15,41.305,1683759.0,2018-12-02 20:30:00,0.2,0.075,0.374998,...,41.256126,41.086851,40.917577,0,0,-0.0835,-0.024069,41.118,40.877931,40.997966
4344,4345,41.305,41.5,41.25,41.416,3133224.0,2018-12-02 21:00:00,0.25,0.111,0.443998,...,41.252977,41.129354,41.005731,0,0,-0.04,-0.024069,41.89,40.853862,41.371931
4345,4346,41.427,41.699,41.402,41.45,3466664.0,2018-12-02 21:30:00,0.297,0.023,0.077441,...,41.26941,41.19082,41.11223,0,0,-0.047353,-0.030179,41.699179,40.811464,41.255322
4346,4347,41.45,41.589,41.38,41.395,1182986.0,2018-12-02 22:00:00,0.209,-0.055,-0.263157,...,41.278723,41.198246,41.117769,0,0,-0.047353,-0.03323,41.651827,40.773098,41.212462


In [25]:
#次のCloseを予測する機械学習
#使わない変数を除外
exe_cols = ['index','Timestamp']
feature_cols = [col for col in data_df1.columns if col not in exe_cols]
print(feature_cols)
#機械学習用にnp配列に変換
data_np= np.array(data_df1[feature_cols])
print(data_np.shape)

['Open', 'High', 'Low', 'Close', 'Volume', 'High-Low', 'Close-Open', 'Similarity', '(High-Low)*Volume ', '(Close-Open)*Volume ', 'MA5', 'MA25', 'Cross', 'bbd_p1', 'bbd_p2', 'bbd_p3', 'bbd_m1', 'bbd_m2', 'bbd_m3', 'golden', 'dead', 'high_slope', 'low_slope', 'tr_high', 'tr_low', 'tr_mid']
(4347, 26)


In [26]:
Y = np.array(data_df1['Close'])#1次元
Y = np.delete(Y,0)
print(Y,Y.shape)
X = data_np[:len(Y),0:]
print(X[:],X.shape)

[37.9   37.646 37.787 ... 41.416 41.45  41.395] (4346,)
[[38.028      38.157      37.8        ...  0.          0.
   0.        ]
 [37.875      37.955      37.708      ...  0.          0.
   0.        ]
 [37.9        37.9        37.6        ...  0.          0.
   0.        ]
 ...
 [41.23       41.35       41.15       ... 41.118      40.87793103
  40.99796552]
 [41.305      41.5        41.25       ... 41.89       40.85386207
  41.37193103]
 [41.427      41.699      41.402      ... 41.69917933 40.81146429
  41.25532181]] (4346, 26)


In [27]:
# 学習では、29/30を使うものとします。これは情報の偏りを防ぐためのものであり、全体でも構いません
L = int(len(X)//(200/199))
print(L)
train_x = X[50:L,:]
train_y = Y[50:L]

4324


In [28]:
# 残りの全てをテストデータとします
test_x = X[L:len(X),:]
test_y = Y[L:len(Y)]

In [29]:
rf = RandomForestRegressor(random_state=1234)

In [30]:
params = {'n_estimators': [15,20,25], 'max_depth': [5,17,30]}
gscv = GridSearchCV(rf, param_grid=params, verbose=1,
                    cv=3, scoring='neg_mean_squared_error')
gscv.fit(train_x, train_y)

Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:   31.2s finished


GridSearchCV(cv=3, error_score='raise',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=1234, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_estimators': [15, 20, 25], 'max_depth': [5, 17, 30]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=1)

In [31]:
a = gscv.best_params_

In [32]:
print(a)
n_estimators = a.get('n_estimators')
max_depth = a.get('max_depth')

{'max_depth': 5, 'n_estimators': 15}


In [33]:
rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth,random_state=1234)
rf.fit(train_x, train_y)
y_pred_rf = rf.predict(test_x)
rf_mse = mean_squared_error(test_y, y_pred_rf)
print('RandomForest MSE: ', rf_mse)

RandomForest MSE:  0.05811461106525313


In [34]:
# 予測結果を出力します。これはランダムフォレスト
result = pd.DataFrame(y_pred_rf)
result.columns = ['y_pred_rf']
result['test_y'] = test_y
#２乗平均誤差ではなく、ただの差
result['RMS'] = (result['test_y']-result['y_pred_rf'])
print(result.loc[:])

    y_pred_rf  test_y       RMS
0   42.565210  42.524 -0.041210
1   42.565210  42.100 -0.465210
2   42.388718  42.228 -0.160718
3   42.388718  42.203 -0.185718
4   42.388718  42.006 -0.382718
5   42.388718  41.620 -0.768718
6   41.636763  41.556 -0.080763
7   41.636763  41.577 -0.059763
8   41.636763  41.697  0.060237
9   41.636763  41.609 -0.027763
10  41.636763  41.548 -0.088763
11  41.636763  41.660  0.023237
12  41.636763  41.598 -0.038763
13  41.636763  41.781  0.144237
14  41.876926  41.660 -0.216926
15  41.636763  41.593 -0.043763
16  41.636763  41.339 -0.297763
17  41.352742  41.230 -0.122742
18  41.352742  41.305 -0.047742
19  41.352742  41.416  0.063258
20  41.455561  41.450 -0.005561
21  41.636763  41.395 -0.241763


In [35]:
# Plot拡大
plt.plot(range(0,len(result[0:15])), test_y[len(test_y)-15:], label='Actual price', color='blue', marker = 'o')
plt.plot(range(0,len(result[0:15])), y_pred_rf[len(test_y)-15:], label='Predicted price', color='red', marker ='x')
plt.xlabel(span)
plt.ylabel('Price (\)')
n3='{0} Price by RandomForest'.format(trade_name)
plt.title(n3)
plt.grid(True)
plt.legend()
n4='{0}2 by RandomForest.png'.format(trade_name)
plt.savefig(n4)
plt.close()
plt.show()

In [36]:
# 変数増加法を実行する関数
def get_gfs_feature_indices(X, y, features, clf):
    X_train_, X_test_, y_train_, y_test_ = \
        train_test_split(X, y, test_size=0.3, shuffle=False)
    feature_indices = {feature: idx for idx, feature in enumerate(features)}
    features = set(features)
    last_mse = np.inf
    chosen_features = set()
    while len(chosen_features) < len(features):
        mse_features = []
        for feature in (features - chosen_features):
            candidates = chosen_features.union(set([feature]))
            indices = [feature_indices[feature] for feature in candidates]
            clf.fit(X_train_[:, indices], y_train_)
            y_pred = clf.predict(X_test_[:, indices])
            mse = mean_squared_error(y_test_, y_pred)
            mse_features += [(mse, feature)]
        mse, feature = min(mse_features)
        if mse >= last_mse:
            break
        last_mse = mse
        print('Newly Added Feature: {},\tMSE Score: {}'.format(feature, mse))
        chosen_features.add(feature)
    return [feature_indices[feature] for feature in chosen_features]

In [37]:
# 上記関数を使用して変数増加法を実行し、MSEを算出
#feature_cols = list('abcdefghij')
feature_cols = list('abcdefghijklmnopqrstuv')
print(feature_cols,len(feature_cols))

selected_feature_index_by_RandomForestRegressor = get_gfs_feature_indices(X=train_x,y=train_y,features=feature_cols,clf= RandomForestRegressor())
print(selected_feature_index_by_RandomForestRegressor)

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v'] 22
Newly Added Feature: d,	MSE Score: 1.5553813452879568
Newly Added Feature: a,	MSE Score: 0.5412490881761503
[3, 0]


In [38]:
#ランダムフォレスト
rf = RandomForestRegressor(random_state=1234)
rf.fit(train_x[:, selected_feature_index_by_RandomForestRegressor], train_y)
y_pred_rf = rf.predict(test_x[:, selected_feature_index_by_RandomForestRegressor])
rf_mse = mean_squared_error(test_y, y_pred_rf)
print('RandomForest MSE: ', rf_mse)

RandomForest MSE:  0.118984051363636


In [39]:
params = {'n_estimators': [15,20,25], 'max_depth': [6,10,20]}
gscv = GridSearchCV(rf, param_grid=params, verbose=1,
                    cv=3, scoring='neg_mean_squared_error')
gscv.fit(train_x[:, selected_feature_index_by_RandomForestRegressor], train_y)

Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    4.3s finished


GridSearchCV(cv=3, error_score='raise',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=1234, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_estimators': [15, 20, 25], 'max_depth': [6, 10, 20]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=1)

In [40]:
a = gscv.best_params_

In [41]:
print(a)
n_estimators = a.get('n_estimators')
max_depth = a.get('max_depth')

{'max_depth': 6, 'n_estimators': 15}


In [42]:
rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth,random_state=1234)
rf.fit(train_x[:, selected_feature_index_by_RandomForestRegressor], train_y)
y_pred_rf_gfs = rf.predict(test_x[:, selected_feature_index_by_RandomForestRegressor])
rf_mse = mean_squared_error(test_y, y_pred_rf_gfs)
print('RandomForest MSE: ', rf_mse)

RandomForest MSE:  0.04271782687653047


In [43]:
# 予測結果を出力します。これはランダムフォレスト
result = pd.DataFrame(y_pred_rf_gfs)
result.columns = ['y_pred_rf_gfs']
result['test_y'] = test_y
#２乗平均誤差ではなく、ただの差
result['RMS'] = (result['test_y']-result['y_pred_rf_gfs'])
print(result.loc[:])

    y_pred_rf_gfs  test_y       RMS
0       42.670411  42.524 -0.146411
1       42.670411  42.100 -0.570411
2       42.174744  42.228  0.053256
3       42.174744  42.203  0.028256
4       42.174744  42.006 -0.168744
5       42.174744  41.620 -0.554744
6       41.567135  41.556 -0.011135
7       41.618995  41.577 -0.041995
8       41.618995  41.697  0.078005
9       41.618995  41.609 -0.009995
10      41.618995  41.548 -0.070995
11      41.618995  41.660  0.041005
12      41.618995  41.598 -0.020995
13      41.618995  41.781  0.162005
14      41.883053  41.660 -0.223053
15      41.618995  41.593 -0.025995
16      41.618995  41.339 -0.279995
17      41.280645  41.230 -0.050645
18      41.252611  41.305  0.052389
19      41.252611  41.416  0.163389
20      41.475560  41.450 -0.025560
21      41.618995  41.395 -0.223995


In [44]:
# Plot拡大
plt.plot(range(0,len(result[0:15])), test_y[len(test_y)-15:], label='Actual price', color='blue', marker = 'o')
plt.plot(range(0,len(result[0:15])), y_pred_rf_gfs[len(test_y)-15:], label='Predicted price', color='red', marker ='x')
plt.xlabel(span)
plt.ylabel('Price (\)')
n3='{0} Price by RandomForest gfs'.format(trade_name)
plt.title(n3)
plt.grid(True)
plt.legend()
n4='{0}2 by RandomForest gfs.png'.format(trade_name)
plt.savefig(n4)
plt.close()
plt.show()