In [1]:
%cd ..

E:\システムトレード入門\predict_git_workspace


In [2]:
import pandas as pd
import numpy as np
import datetime
import time
from pytz import timezone

from pathlib import Path
import math

In [3]:
from bokeh.models import ColumnDataSource,BooleanFilter, CDSView, Range1d
from bokeh.models import DatetimeTickFormatter
from bokeh.io import curdoc

In [4]:
import bokeh.plotting
from bokeh.io import output_notebook, push_notebook
import bokeh.io
output_notebook()

In [5]:
from utils import get_df_freq, get_sec_from_freq, middle_sample_type_with_check
from utils import get_next_datetime, ConvertFreqOHLCV

In [6]:
from get_stock_price import StockDatabase

In [7]:
from transforms import movingaverage, SimpleMovingAverage

In [8]:
from visualization import BokehCandleStick, static_candlestick, StockDataSupplier, StockDataSupplierDB

### データベースの読み込み

In [9]:
db_path = Path("db/sample_db") / Path("sample.db")
stock_db = StockDatabase(db_path)

In [12]:
stock_name = "6502"


jst_timezone = timezone("Asia/Tokyo")
start_time = jst_timezone.localize(datetime.datetime(2020, 10, 30, 9, 0, 0))
#end_time = jst_timezone.localize(datetime.datetime(2020, 10, 30, 15, 0, 0))
end_time = jst_timezone.localize(datetime.datetime(2020, 10, 30, 12, 0, 0))

#start_time = jst_timezone.localize(datetime.datetime(2020, 11, 1, 9, 0, 0))
#end_time = jst_timezone.localize(datetime.datetime(2020, 11, 1, 15, 0, 0))


span_df = stock_db.search_span(stock_name, start_time, end_time, freq_str="T",to_tokyo=True)
span_df

Unnamed: 0_level_0,Open_6502,High_6502,Low_6502,Close_6502,Volume_6502
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-30 09:00:00+09:00,2695.0,2696.0,2686.0,2686.0,0.0
2020-10-30 09:01:00+09:00,2691.0,2691.0,2683.0,2684.0,7000.0
2020-10-30 09:02:00+09:00,2683.0,2687.0,2681.0,2683.0,3800.0
2020-10-30 09:03:00+09:00,2686.0,2687.0,2683.0,2683.0,1600.0
2020-10-30 09:04:00+09:00,2683.0,2683.0,2675.0,2676.0,8200.0
...,...,...,...,...,...
2020-10-30 11:55:00+09:00,,,,,
2020-10-30 11:56:00+09:00,,,,,
2020-10-30 11:57:00+09:00,,,,,
2020-10-30 11:58:00+09:00,,,,,


In [13]:
def static_movingaverage_chart(df, ohlc_dict, window_size, ohlc="Open", freq_str=None, figure=None, line_color="dodgerblue"):
    
    if freq_str is None:
        freq_str = get_df_freq(df)
    
    df = df.copy()  # index等を変更するため
    
    # 同じdatetmeを持つnaiveなdatetimeに変形
    if df.index.tzinfo is not None:  # awareな場合
        df.index = df.index.tz_localize(None)
        
    convert = ConvertFreqOHLCV(freq_str)
    df = convert(df)
        
    seconds = get_sec_from_freq(freq_str)
        
    if set(list(ohlc_dict.keys())) < set(["Open", "High", "Low", "Close"]):
           raise ValueError("keys of ohlc_dict must have 'Open', 'High', 'Low', 'Close'.")
    elif set(list(ohlc_dict.keys())) > set(["Open", "High", "Low", "Close", "Volume"]):  #Volumeは別にあってもよい
        raise ValueError("keys of ohlc_dict is too many.")
        
    convert_movingaverage = SimpleMovingAverage(window_size, use_nan=True)
    ma_df = convert_movingaverage(df)
    line_ma_series = ma_df[ohlc_dict[ohlc]+convert_movingaverage.column_suffix].copy()  # Series
    
    if figure is None:
        p = bokeh.plotting.figure(x_axis_type="datetime", plot_width=1000)
    else:
        p = figure
        
    p.line(x=line_ma_series.index,#X軸
           y=line_ma_series,      #Y軸
           line_width=2,          #線の幅
           line_color=line_color,    #線の色
           line_dash="solid",   #線の種類（実線：solid、点線：dashed・dotted・dotdash・dashdot）
           legend_label=convert_movingaverage.column_suffix          #凡例の表示名
          )
    
    return p

In [14]:
ohlc_dict = {"Open":"Open_6502", "High":"High_6502", "Low":"Low_6502", "Close":"Close_6502"}
p = static_movingaverage_chart(span_df, ohlc_dict, window_size=10,freq_str="5T")
bokeh.io.show(p)

### 組み合わせて表示

In [15]:
p = static_candlestick(span_df, ohlc_dict, freq_str="5T")
p = static_movingaverage_chart(span_df, ohlc_dict, window_size=10, ohlc="Open", freq_str="5T", figure=p)
bokeh.plotting.show(p)

In [16]:
class BokehMovingAverageChart:
    """
    移動平均
    """
    def __init__(self, 
                 stock_data_supplier,  
                 ohlc_dict, 
                 window_size,
                 initial_start_datetime, 
                 initial_end_datetime, 
                 ohlc="Open",
                 freq_str="T", 
                 figure=None,
                 y_axis_margin=50, 
                 use_x_range=True,
                 use_y_range=True,
                 data_left_times=1,
                 is_notebook=True,
                 use_formatter=True,
                 line_color="dodgerblue"
                ):
        """
        stock_supplier: StockDataSupplier or any
            株価データを供給するためのオブジェクト
        ohlc_dict: dict of str
            {"Open":カラム名,"Close":カラム名}のような辞書，stock_dbの出力に依存する
        window_size: int
            平均する窓関数のサイズ
        initial_start_date: datetime
            開始時のx_rangeの下限のdatetime
        initial_end_date: datetime
            開始じのx_rangeの上限のdatetime
        ohlc: str
            平均するOHLCのいずれか
        freq_str: str
            サンプリング周期
        figure: bokeh.plotting.Figure
            複数描画の場合
        y_axis_margin: int
            yの表示領域のマージン
        use_x_range: bool
            このクラスにx_rangeの変更を任せるかどうか
        color_positive: str
            始値＜終値の場合の色，終値＜始値の場合の色
        """    
        self.stock_data_supplier = stock_data_supplier
        self.ohlc_dict = ohlc_dict
        self.window_size = window_size
        self.ohlc = ohlc
        self.y_axis_margin = y_axis_margin
        self.is_notebook = is_notebook
        self.use_x_range = use_x_range
        self.use_y_range = use_y_range
        self.use_formatter = use_formatter
        self.line_color = line_color
        
        # ymax, yminを整えるのに使う
        self.last_ymax = self.y_axis_margin
        self.last_ymin = - self.y_axis_margin
        
        self.freq_str = middle_sample_type_with_check(freq_str)

        seconds = get_sec_from_freq(self.freq_str)

        # ohlc_dictのチェック
        if set(list(ohlc_dict.keys())) < set(["Open", "High", "Low", "Close"]):
            raise ValueError("keys of ohlc_dict must have 'Open', 'High', 'Low', 'Close'.")
        elif set(list(ohlc_dict.keys())) > set(["Open", "High", "Low", "Close", "Volume"]):  #Volumeは別にあってもよい
            raise ValueError("keys of ohlc_dict is too many.")
            
        # 最初のDataFrame
        start_df = self.stock_data_supplier.initial_data(start_datetime=initial_start_datetime, end_datetime=initial_end_datetime)  # Series
                
        #start_dfから，移動平均を算出
        self.ohlc_column_list = [self.ohlc_dict["Open"], self.ohlc_dict["High"], self.ohlc_dict["Low"], self.ohlc_dict["Close"]]
        sub_start_df = start_df.loc[:,self.ohlc_column_list]
                                    
        self.initial_length = len(sub_start_df.index)
        self.source_length = self.initial_length * data_left_times
        
        # 更新用に，windowsizeのarrayを取得
        self.use_column_name = self.ohlc_dict[self.ohlc]
        #from IPython.core.debugger import Pdb; Pdb().set_trace()
        self.window_array = sub_start_df[self.use_column_name].values[-window_size:].copy()  # クローンしておく
        print(self.window_array)
        
        # 移動平均のdf
        convert_movingaverage = SimpleMovingAverage(window_size, use_nan=True)
        start_df_ma = convert_movingaverage(sub_start_df)
        
        # ソースとなるdfを抽出
        #　利用するカラム名
        self.use_ma_column_name = self.ohlc_dict[self.ohlc] + convert_movingaverage.column_suffix
        use_column_start_df_ma = start_df_ma.loc[:,[self.use_ma_column_name,]].copy()  # 一つのカラムを持つdf
        #print("use_column_start_df_ma:",use_column_start_df_ma)
        use_column_start_df_ma_as_series = use_column_start_df_ma[self.use_ma_column_name]  # シリーズとして保持
        
        # 同じdatetimeを持つnaiveなdatetimeに変形
        if start_df_ma.index.tzinfo is not None:  # awareな場合
            start_df_ma.index = start_df_ma.index.tz_localize(None)
        #print("sub_start_df:",sub_start_df)

        # ソースを追加
        self.source = ColumnDataSource(use_column_start_df_ma)
        
        # ソースのビュー
        self.temp_isnot_nan = ~use_column_start_df_ma_as_series.isnull()  # 更新時に利用，否定演算子に注意
        #print("temp_isnot_nan:",self.temp_isnot_nan)
        self.temp_isnot_nan = self.temp_isnot_nan.copy()  # copyをとっておく
        isnot_filter = BooleanFilter(self.temp_isnot_nan)
        #print("start_df_ma:",start_df_ma)
        #print("temp_is_nan:",self.temp_is_nan)
        
        self.view_isnot_nan = CDSView(source=self.source, filters=[isnot_filter,])
        
        
        y_max, y_min = self._make_y_range(use_column_start_df_ma, margin=self.y_axis_margin)
        
        if figure is None:  # コンストラクタにbokehのfigureが与えられない場合
            if not self.use_x_range or not self.use_y_range:
                raise ValueError("set the use_x_range: True, use_y_range: True")
            source_df = self.source.to_df()
            timestamp_series = source_df.loc[:,"timestamp"]
            self.x_range = Range1d(timestamp_series.iloc[-self.initial_length], timestamp_series.iloc[-1])  # 最後からinitial_length分だけ表示させるためのx_range
            #print("x_range:",self.x_range.start, self.x_range.end)
            self.y_range = Range1d(y_min, y_max)
            self.dp = bokeh.plotting.figure(x_axis_type="datetime", plot_width=1000, x_range=self.x_range, y_range=self.y_range)
        else:
            self.dp = figure
            self.y_range = figure.y_range
            self.x_range = figure.x_range
        
        self.dp.line(x="timestamp",#X軸
                     y=self.use_ma_column_name,      #Y軸
                     line_width=2,          #線の幅
                     line_color=self.line_color,    #線の色
                     line_dash="solid",   #線の種類（実線：solid、点線：dashed・dotted・dotdash・dashdot）
                     legend_label=convert_movingaverage.column_suffix,      #凡例の表示名
                     source=self.source,
                     view=self.view_isnot_nan
                    )
        
        # formatter 機能しない
        if self.use_formatter:
            x_format = "%m-%d-%H-%M"
            self.dp.xaxis.formatter = DatetimeTickFormatter(
                minutes=[x_format],
                hours=[x_format],
                days=[x_format],
                months=[x_format],
                years=[x_format]
            )
            self.dp.xaxis.major_label_orientation = math.radians(45)

        
        # データ供給用ジェネレータ
        self.stock_data_supplier_gen = self.stock_data_supplier.iter_data(start_datetime=initial_end_datetime)
    
    def update(self):
        
        # ソースに加える長さ1のDataFrame
        one_df = next(self.stock_data_supplier_gen)  # ジェネレーターから取り出す
        print("one_df:",one_df)
        
        #one_df = self._fill_nan_zero(one_df)  # Noneをなくしておく(bokehが認識できるようにするため
                
        # 同じdatetimeの値をもつnaiveなdatetimeを取得：
        if len(one_df.index) > 0:
            one_df.index = one_df.index.tz_localize(None)
         
        one_item = one_df.loc[one_df.index[0],self.use_column_name]  # 一つ取り出した値
        
        # filterの調整
        if one_item is not None:
            one_bool = pd.Series([True])
            new_window_array = np.append(self.window_array,[one_item])  # itemを追加
            self.window_array = np.delete(new_window_array, 0)  # インデックス0のものを削除
            window_array_without_nan = self.window_array[~np.isnan(self.window_array)]  # nanを除いたwindow_array
            one_item_ma = window_array_without_nan.mean()  # nanを除いて平均を計算
        else:
            new_window_array = np.append(self.window_array,[np.nan])  # nanを追加
            self.window_array = np.delete(new_window_array, 0)  # インデックス0のものを削除
            one_bool = pd.Series([False])
            one_item_ma = 0  # これは描画されないはず(filterによって)　Noneはソースに加えられないため
        
        print("window_array:",self.window_array)
        
        #new_dict = {i:[one_df.loc[one_df.index[0],i]] for i in self.source_column_list}  # timestampを除く
        new_dict = {self.use_ma_column_name:[one_item_ma,]}
        
        print("new_dict:",new_dict)
        new_dict["timestamp"] = np.array([one_df.index[0].to_datetime64()])

        new_isnot_nan_booleans = pd.concat([self.temp_isnot_nan, one_bool])  # 後ろに追加
        if len(new_isnot_nan_booleans.index) > self.source_length:  # ソースの長さを超えた場合
            new_isnot_nan_booleans = new_isnot_nan_booleans.drop(new_isnot_nan_booleans.index[0])  # 最初を削除
        self.temp_isnot_nan = new_isnot_nan_booleans
        
        # filterの変更
        self.view_isnot_nan.filters = [BooleanFilter(self.temp_isnot_nan),]

        # sourceの変更
        self.source.stream(new_data=new_dict, rollover=self.source_length)
        
        # 範囲選択
        source_df = self.source.to_df()
        # yの範囲
        if self.use_y_range:
            y_max, y_min = self._make_y_range(source_df, self.y_axis_margin)
            self.y_range.start = y_min
            self.y_range.end = y_max
        #print("y_range:", self.y_range.start, self.y_range.end)
        # xの範囲
        if self.use_x_range:
            timestamp_series = source_df.loc[:,"timestamp"]
            self.x_range.start = timestamp_series.iloc[-self.initial_length]
            self.x_range.end = timestamp_series.iloc[-1]
        #print("x_range:",self.dp.x_range.start, self.dp.x_range.end)
            
    def _make_y_range(self, df, margin=50):
        new_df = df.replace(0, None)  # Noneに変更してhigh, lowを計算しやすくこれでも0になることがあるらしい．
     
        y_max = new_df.loc[:,self.use_ma_column_name].max() + margin
        y_min = new_df.loc[:,self.use_ma_column_name].min() - margin
        print(y_max)
        print(y_min)
        
        if y_max == margin:  # Highが0の場合
            y_max = self.last_ymax
        else:
            self.last_ymax = y_max
        
        if y_min == -margin:  # Lowが0の場合
            y_min = self.last_ymin
        else:
            self.last_ymin = y_min
        
        return y_max, y_min

In [17]:
stock_data_supplier = StockDataSupplierDB(stock_db, stock_name, freq_str="5T", to_tokyo=True)


bokeh_ma_chart = BokehMovingAverageChart(stock_data_supplier,  
                                         ohlc_dict=ohlc_dict, 
                                         window_size=10,
                                         initial_start_datetime=start_time,
                                         initial_end_datetime=end_time,
                                         freq_str="5T",
                                         y_axis_margin=10,
                                         data_left_times=5,
                                         use_formatter=True,
                                         is_notebook=True
                                         )

[2649. 2648. 2646. 2646. 2648. 2648.   nan   nan   nan   nan]
2682.9
2636.5


In [18]:
t = bokeh.io.show(bokeh_ma_chart.dp, notebook_handle=True)
for i in range(1000):
    time.sleep(1)
    bokeh_ma_chart.update()
    push_notebook(handle=t)

ERROR:bokeh.core.validation.check:E-1024 (CDSVIEW_FILTERS_WITH_CONNECTED): CDSView filters are not compatible with glyphs with connected topology such as Line or Patch: GlyphRenderer(id=1487, glyph=Line(id='1485', ...), ...)


one_df:                           Open_6502  High_6502  Low_6502 Close_6502  \
timestamp                                                             
2020-10-30 12:00:00+09:00      None        NaN       NaN       None   

                           Volume_6502  
timestamp                               
2020-10-30 12:00:00+09:00            0  
window_array: [2648. 2646. 2646. 2648. 2648.   nan   nan   nan   nan   nan]
new_dict: {'Open_6502_ma10': [0]}
2682.9
2636.5
one_df:                           Open_6502  High_6502  Low_6502 Close_6502  \
timestamp                                                             
2020-10-30 12:05:00+09:00      None        NaN       NaN       None   

                           Volume_6502  
timestamp                               
2020-10-30 12:05:00+09:00            0  
window_array: [2646. 2646. 2648. 2648.   nan   nan   nan   nan   nan   nan]
new_dict: {'Open_6502_ma10': [0]}
2682.9
2636.5
one_df:                           Open_6502  High_6502  Low_65

KeyboardInterrupt: 

one_df:                           Close_6502  High_6502  Low_6502 Open_6502  \
timestamp                                                             
2020-10-30 12:00:00+09:00       None        NaN       NaN      None   

                           Volume_6502  
timestamp                               
2020-10-30 12:00:00+09:00            0  
new_dict: {'Open_6502_ma10': [0]}
2682.9
2636.5
one_df:                           Close_6502  High_6502  Low_6502 Open_6502  \
timestamp                                                             
2020-10-30 12:05:00+09:00       None        NaN       NaN      None   

                           Volume_6502  
timestamp                               
2020-10-30 12:05:00+09:00            0  
new_dict: {'Open_6502_ma10': [0]}
2682.9
2636.5
one_df:                           Close_6502  High_6502  Low_6502 Open_6502  \
timestamp                                                             
2020-10-30 12:10:00+09:00       None        NaN       NaN      N

KeyboardInterrupt: 