In [1]:
import pandas as pd
import numpy as np
import requests
import math
from bs4 import BeautifulSoup
from datetime import date, datetime
from pandas_datareader.stooq import StooqDailyReader

In [2]:
data_dir = '../data/'
url_minkabu = 'https://minkabu.jp/stock/'
get_dates = 3
title = ['Date', 'Open', 'High', 'Low', 'Close', 'aClose', 'Volume']

## 株価取得＆更新スクリプト

In [3]:
def get_master_code():
    path = data_dir + '/master/get_code.csv'
    codemaster_df = pd.read_csv(path)
    
    return codemaster_df

In [4]:
def get_update_price(url):
    # 値の取得
    response = requests.get('https://minkabu.jp/stock/9984/daily_bar')
    soup = BeautifulSoup(response.text, 'html.parser')

    # テーブルを指定
    table = soup.findAll('table', {'id' : 'fourvalue_timeline'})[0]
    rows = table.findAll('tr')

    # trの解析
    trs = table.find_all('tr')
    tr = trs[0]

    # ボディの解析
    matrix = []
    for tr in trs[1:]:
        r = []
        for td in tr.find_all('td'):
            r.append(td.text)
        matrix.append(r)
        
    df = pd.DataFrame(data=matrix, columns=title)
    
    return df

In [55]:
def update_price(code, df_price):
    # 日時をdatetimeに変換
    df_price['Date'] = pd.to_datetime(df_price['Date'], format='%Y-%m-%d')
    df_price = df_price.replace('(,|\.)', r'', regex=True)
    df_price[['Open', 'High', 'Low', 'Close', 'aClose']] = df_price[['Open', 'High', 'Low', 'Close', 'aClose']].astype('int') / 10
    
    # 元データを整形
    data = pd.read_csv(data_dir + str(code) + '/data.csv', encoding = 'utf8', parse_dates=['Date'])
    data = data.sort_values('Date')
    data = data.reset_index(drop=True)
    
    # 指定日の最新データを投入
    data = data.append(df_price.iloc[0:get_dates,:], ignore_index=True)
    
    # 型変換＆重複削除
    data = data.astype({'Open' : 'int', 'High' : 'int', 'Low' : 'int', 'Close' : 'int', 'aClose' : 'int', 'Volume' : 'int'})
    data = data.drop_duplicates(subset=['Date'])
    
    # CSVファイルに出力
    data.to_csv(data_dir + str(code) + '/data.csv', index = False)
    
    return data

In [84]:
def calc_backtest(data, code):
    print('---------------------------------------------')
    print("Stock Code : " + str(code))
    
    data = data.sort_values('Date')
    # Sharpe Ratio
    sr = get_shaperatio(data[-256:-1])
    print('Sharpe Ratio : ' + str(sr))
    
    # HHI
    ret = data["Close"].pct_change()
    hhiPos = get_hhi(ret[ret >= 0])
    hhiNeg = get_hhi(ret[ret < 0])
    print('hhiPos : ' + str(hhiPos))
    print('hhiNeg : ' + str(hhiNeg))
    
    # updateMax and Min
    updateMax, updateMin = get_updateMax_and_Min(data)
    print('updateMax : ' + str(updateMax))
    print('updateMin : ' + str(updateMin))
    
    # updateMax and Min All Past
    updateMaxAll, updateMinAll = get_updateMax_and_Min_All(data)
    print('updateMaxAllPast : ' + str(updateMaxAll))
    print('updateMinAllPast : ' + str(updateMinAll))
    
    # get_n225_corr
    corr = get_n225_corr(data)
    print('n225_corr : ' + str(corr))
    
    # The day before ratio
    compare_price, compare_percentage = get_lastRatio(data)
    print('前日との終値差異 ： ' + str(compare_price))
    print('前日との終値比率差異 ： ' + str(compare_percentage))
    
    print('---------------------------------------------')

## バックテスト用メソッド

In [85]:
def get_shaperatio(data):
    df = data.copy()
    
    df["daily_ret"] = data["Close"].pct_change() #株価終値の前日との変化率を計算する。
    ret_ave = np.mean(df["daily_ret"])
    vol_sp = df["daily_ret"].std()
    
    return  math.sqrt(256) * ret_ave / vol_sp

In [86]:
def get_hhi(ret):
    if ret.shape[0] <= 2: return np.nan
    wght = ret / ret.sum()
    hhi = (wght**2).sum()
    hhi = (hhi - ret.shape[0] ** -1) / (1. - ret.shape[0]**-1)

    return hhi

In [None]:
def get_n225_corr(data):
    #株価取得範囲を設定
    start = data.iloc[-2, 0]
    end = data.iloc[-1, 0]
    stock = '^NKX'

    # 元データを整形
    df_master = pd.read_csv(data_dir + 'n225/data.csv', encoding = 'utf8', parse_dates=['Date'])
    df_master = df_master.sort_values('Date')
    df_master = df_master.reset_index(drop=True)

    #株価取得
    df_update = StooqDailyReader(stock, start=start, end=end)
    df_update = df_update.read().reset_index().sort_values('Date').reset_index(drop=True)

    # 指定日の最新データを投入
    df_master = df_master.append(df_update.iloc[0:get_dates,:], ignore_index=True)
    
    # 型変換＆重複削除
    df_master = df_master.astype({'Open' : 'int', 'High' : 'int', 'Low' : 'int', 'Close' : 'int'})
    df_master = df_master.drop_duplicates(subset=['Date'])
    df_master.to_csv(data_dir + 'n225/data.csv', index=False)
    
    # 相関係数の計算
    _df = pd.DataFrame()
    _df["Stock"] = data.Close
    _df["N225"] = df_master.Close

    # 不要データ削除して相関係数を計算
    corr = _df.dropna().corr(method='pearson')['Stock']['N225']
    
    return corr

## 株価監視メソッド

In [87]:
def get_updateMax_and_Min(data):
    updateMax = False
    updateMin = False
    thisMonth = data.iloc[-1, 0].month
    if not thisMonth in [1,2,3]:
        # 昨年来高値・安値
        startDate = date(data.iloc[-1, 0].year - 1, 1, 1).isoformat()
        endDate = date(data.iloc[-1, 0].year - 1, 12, 31).isoformat()

    else:
        # 年初来高値・安値
        startDate = date(data.iloc[-1, 0].year, 1, 1).isoformat()
        endDate = date(data.iloc[-1, 0].year, 12, 31).isoformat()

    _df = data[(data['Date'] >= startDate) & (data['Date'] <= endDate)]

    pastMax = _df['Close'].max()
    pastMin = _df['Close'].min()
    thisClose = data.iloc[-1, 4]
    if pastMax < thisClose:
        updateMax = True
    if pastMin > thisClose:
        updateMin = True
    
    return updateMax, updateMin

In [88]:
def get_updateMax_and_Min_All(data):
    updateMax = False
    updateMin = False

    pastMax = data['Close'].max()
    pastMin = data['Close'].min()
    thisClose = data.iloc[-1, 4]
    if pastMax < thisClose:
        updateMax = True
    if pastMin > thisClose:
        updateMin = True
    
    return updateMax, updateMin

In [89]:
def get_lastRatio(data):
    thisClose = data.iloc[-1, 4]
    lastClose = data.iloc[-2, 4]
    compare_price = (thisClose - lastClose)
    compare_percentage = (thisClose / lastClose)

    return compare_price, compare_percentage

In [91]:
## main
codemaster_df = get_master_code()
for idx, row in codemaster_df.iterrows():
    df_price = get_update_price(row['time_url'])
    data = update_price(row["code"], df_price)
    calc_backtest(data, row["code"])

---------------------------------------------
Stock Code : 9984
Sharpe Ratio : -0.376943403468255
hhiPos : 0.005089920706469432
hhiNeg : 0.0040816075190960735
updateMax : False
updateMin : False
updateMaxAllPast : False
updateMinAllPast : False
n225_corr : 0.7732173873104088
前日との終値差異 ： -33
前日との終値比率差異 ： 0.9940551252026661
---------------------------------------------


In [53]:
data.iloc[:, 4].max()

10635