In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib

Using matplotlib backend: MacOSX


In [3]:
def pickup_data(year, JyoCD=None, race_type=0):
    """
    race_type:
        0: 芝とダート
        1: ダート
        2: 芝
    """
    # x分xx秒の文字列を秒のfloatに変換
    def convert_time_string(s):
        min = float(s[0])
        sec = float(s[1:3])
        sec10 = float(s[3])
        return min * 60 + sec + (sec10/10)

    engine = create_engine('mysql+pymysql://uma:UmaUma123!@localhost/everydb2_2?charset=utf8')

    # get all uma_race record
    sql = "select a.Year,a.MonthDay,a.JyoCD,a.RaceNum,a.KettoNum,a.Bamei,a.KakuteiJyuni,a.Time,a.TimeDiff, a.Odds, a.Ninki, \
        b.Kyori,b.TrackCD, \
        c.PayTansyoPay1, c.PayTansyoNinki1 \
        from n_uma_race as a, n_race as b, n_harai as c \
        where a.Year = %s and \
        a.Year = b.Year and a.MonthDay = b.MonthDay and a.JyoCD = b.JyoCD and a.RaceNum = b.RaceNum and \
        a.Year = c.Year and a.MonthDay = c.MonthDay and a.JyoCD = c.JyoCD and a.RaceNum = c.RaceNum" % year
    
    all_uma_race = pd.read_sql_query(sql, engine)

    #　文字列を数字に変換
    all_uma_race[["Kyori"]]=all_uma_race[["Kyori"]].astype(float)
    all_uma_race[["TrackCD"]]=all_uma_race[["TrackCD"]].astype(int)
    all_uma_race['Seconds'] = all_uma_race['Time'].map(convert_time_string)
    all_uma_race[["PayTansyoPay1"]]=all_uma_race[["PayTansyoPay1"]].astype(float)
    all_uma_race[["PayTansyoNinki1"]]=all_uma_race[["PayTansyoNinki1"]].astype(int)
    all_uma_race[["TimeDiff"]]=all_uma_race[["TimeDiff"]].astype(float)
    all_uma_race[["KakuteiJyuni"]]=all_uma_race[["KakuteiJyuni"]].astype(int)
    all_uma_race[["Odds"]]=all_uma_race[["Odds"]].astype(float)
    all_uma_race[["Ninki"]]=all_uma_race[["Ninki"]].astype(int)


    # Time 0のデータを除外
    all_uma_race = all_uma_race[all_uma_race['Seconds'] > 0]
    # 障害レースを除外
    all_uma_race = all_uma_race[all_uma_race['TrackCD'] < 50]
    
    # ダートレースに限定
    if race_type == 1:
        all_uma_race = all_uma_race[all_uma_race['TrackCD'] >= 23]
    # 芝レースに限定
    elif race_type == 2:
        all_uma_race = all_uma_race[all_uma_race['TrackCD'] <= 22]
        all_uma_race = all_uma_race[all_uma_race['TrackCD'] >= 10]

    # KakuteiJyuni=01のみ対象とする
    # all_uma_race = all_uma_race[all_uma_race['KakuteiJyuni'] == '01']

    # 競馬場を限定する
    if JyoCD is not None:
        all_uma_race = all_uma_race[all_uma_race['JyoCD'] == JyoCD]

    #単勝払戻金を確定順位１以外の馬は0に設定する
    all_uma_race.loc[all_uma_race['KakuteiJyuni'] != 1,'PayTansyoPay1'] = 0
    #単勝人気を確定順位１以外の馬はNoneに設定する
    all_uma_race.loc[all_uma_race['KakuteiJyuni'] != 1,'PayTansyoNinki1'] = None
    return all_uma_race

In [4]:
def buiuld_zenso_data(year):
    data = pickup_data(year=year)
    # 前走の確定順位と時間差をカラムに追加する
    sorted = data.sort_values(by=['KettoNum','Year','MonthDay'])
    for i in range(0, sorted.shape[0]-1):
        kt1 = sorted.iloc[i]['KettoNum']
        kt2 = sorted.iloc[i+1]['KettoNum']
        if kt1 == kt2:
            sorted.loc[sorted.index[i],'ZensoKakuteiJyuni']  = sorted.iloc[i+1]['KakuteiJyuni']
            sorted.loc[sorted.index[i],'ZensoTimeDiff']  = sorted.iloc[i+1]['TimeDiff']
        else:
            sorted.loc[sorted.index[i],'ZensoKakuteiJyuni']  = None
            sorted.loc[sorted.index[i],'ZensoTimeDiff']  = None
    return sorted.sort_index()

In [5]:
#データをDBから読み込み
data_from_db = {}
year = 2017
data_from_db[str(year)] = buiuld_zenso_data(year)

In [6]:
# Tensor Flowによる回帰分析
# 各種パラメータと回収率の関係
data = data_from_db[str(year)].copy()

#data['log_tansho_pay'] = data.PayTansyoPay1.apply(lambda x: np.log(x+1))
                             
dim = 4
x = tf.placeholder(tf.float32, [None, dim+1])
w = tf.Variable(tf.zeros([dim+1,1]))
y = tf.matmul(x,w)
t = tf.placeholder(tf.float32, [None, 1])
loss = tf.reduce_sum(tf.square(y - t))
train_step = tf.train.AdamOptimizer().minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())

#  前走データがないデータは除く
data = data[np.isfinite(data.ZensoKakuteiJyuni)]

# 前走着差が負のデータは0に設定する 
data.loc[data['ZensoTimeDiff'] < 0.0, 'ZensoTimeDiff'] = 0.0

# 払い戻し(np array)

#train_t = np.array(data.log_tansho_pay)
train_t = np.array(data.PayTansyoPay1)

# 前走確定順位
x_array = np.array(data.ZensoKakuteiJyuni)

#前走着差
x2_array = np.array(data.ZensoTimeDiff)

# 単勝人気
# x_array = np.array(data.Ninki)

# 単勝Odds
#x_array = np.array(data.Odds)


data_count = len(x_array)

train_t = train_t.reshape([data_count, 1])
train_x = np.zeros([data_count,dim+1])

for row in range(0,data_count):
    for col in range(0, dim):
        train_x [row][col] = x_array[row] ** col
    train_x[row][dim] = x2_array[row]


i = 0
for _ in range(100000):
    i += 1
    sess.run(train_step, feed_dict={x:train_x, t: train_t})
    if i % 10000 == 0:
        loss_val = sess.run(loss, feed_dict={x:train_x, t: train_t})
        print ('Step : %d, Loss: %f' % (i, loss_val))

Step : 10000, Loss: 12395298816.000000
Step : 20000, Loss: 12381039616.000000
Step : 30000, Loss: 12378504192.000000
Step : 40000, Loss: 12377245696.000000
Step : 50000, Loss: 12376379392.000000
Step : 60000, Loss: 12375841792.000000
Step : 70000, Loss: 12375489536.000000
Step : 80000, Loss: 12375566336.000000
Step : 90000, Loss: 12375508992.000000
Step : 100000, Loss: 12375504896.000000


In [7]:
w_val = sess.run(w)
print (w_val)

[[  6.96963196e+01]
 [ -1.11528265e+00]
 [  4.77208197e-01]
 [ -2.55047437e-02]
 [  1.99921981e-01]]


In [8]:
train_x

array([[  1.00000000e+00,   5.00000000e+00,   2.50000000e+01,
          1.25000000e+02,   7.00000000e+00],
       [  1.00000000e+00,   1.40000000e+01,   1.96000000e+02,
          2.74400000e+03,   5.00000000e+01],
       [  1.00000000e+00,   8.00000000e+00,   6.40000000e+01,
          5.12000000e+02,   1.30000000e+01],
       ..., 
       [  1.00000000e+00,   1.30000000e+01,   1.69000000e+02,
          2.19700000e+03,   2.40000000e+01],
       [  1.00000000e+00,   7.00000000e+00,   4.90000000e+01,
          3.43000000e+02,   7.00000000e+00],
       [  1.00000000e+00,   1.20000000e+01,   1.44000000e+02,
          1.72800000e+03,   2.50000000e+01]])

In [9]:
w_val = sess.run(w)

def predict(x,y):
    result = 0.0
    for n in range(0, dim):
        result += w_val[n][0] * x**n
    result += w_val[dim][0] * y
    return result

fig = plt.figure()
subplot = fig.add_subplot(1,1,1)
#subplot.set_xlim(1, 18)
#subplot.set_ylim(0, 300)
#subplot.scatter(x_array, train_t)
linex = np.linspace(1, 18, 100)
liney = predict(linex,10)
subplot.plot(linex, liney)

[<matplotlib.lines.Line2D at 0x132710f28>]

In [None]:
# 条件に従って単勝払い戻し金を抽出する        
table = []
for year in range(2010,2016):
    data = buiuld_zenso_data(year)
    for jyuni_limit in range(5,8):
        for timediff_limit in range(10,20,1):
            #print("Juni:",jyuni_limit,"   Time:", timediff_limit)
            sorted_2 = data[(data['ZensoKakuteiJyuni'] >= jyuni_limit) & (data['ZensoTimeDiff'] <= timediff_limit)]['PayTansyoPay1']
            #print(sorted_2.sum())
            table += [[year,jyuni_limit,timediff_limit,sorted_2.sum()]]
df = DataFrame(table)
df

In [None]:
df.to_csv('tansho_pay.csv')

In [None]:
df = pd.read_csv('tansho_pay.csv',names=['No.','Year','JyuniLimit','TimeLimit','Return'])
for jyuni_limit in range(5,8):
    df2010 = df[(df['Year'] == 2014) & (df['JyuniLimit'] == jyuni_limit)]
    df2010.plot()

In [None]:
# 馬券購入シミュレーション
def simulate_buy(year):
    print("**********")
    print("Year: %d" % year)
    data = buiuld_zenso_data(year)
    #data = data_from_db[str(year)].copy()
    print("ticket count:", len(data))

    #  前走データがないデータは除く
    data = data[np.isfinite(data.ZensoKakuteiJyuni)]

    # 前走着差が負のデータは0に設定する 
    data.loc[data['ZensoTimeDiff'] < 0, 'ZensoTimeDiff'] = 0

    ###### 条件に従って単勝払い戻し金を抽出する        
    data = data[(data['ZensoKakuteiJyuni'] >= 10)]
    data = data[(data['ZensoTimeDiff'] <= 10.0)]

    # 購入金額を引く
    data['payback'] = data.PayTansyoPay1 - 100
    accumulation = data.payback.cumsum(skipna=False)
    print("count:", len(data))
    accumulation.plot()
    print ("min:", accumulation.min())
    print ("year end:", accumulation.iloc[-1]) 

In [None]:
for y in range(2010,2018):
    simulate_buy (year=y)