In [10]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib

Using matplotlib backend: MacOSX


In [11]:
def pickup_data(year, JyoCD=None, race_type=0):
    """
    race_type:
        0: 芝とダート
        1: ダート
        2: 芝
    """
    # x分xx秒の文字列を秒のfloatに変換
    def convert_time_string(s):
        min = float(s[0])
        sec = float(s[1:3])
        sec10 = float(s[3])
        return min * 60 + sec + (sec10/10)

    engine = create_engine('mysql+pymysql://uma:UmaUma123!@localhost/everydb2_2?charset=utf8')

    # get all uma_race record
    sql = "select a.Year,a.MonthDay,a.JyoCD,a.RaceNum,a.KettoNum,a.Bamei,a.KakuteiJyuni,a.Time,a.TimeDiff, a.Odds, a.Ninki, \
        b.Kyori,b.TrackCD, \
        c.PayTansyoPay1, c.PayTansyoNinki1 \
        from n_uma_race as a, n_race as b, n_harai as c \
        where a.Year = %s and \
        a.Year = b.Year and a.MonthDay = b.MonthDay and a.JyoCD = b.JyoCD and a.RaceNum = b.RaceNum and \
        a.Year = c.Year and a.MonthDay = c.MonthDay and a.JyoCD = c.JyoCD and a.RaceNum = c.RaceNum" % year
    
    all_uma_race = pd.read_sql_query(sql, engine)

    #　文字列を数字に変換
    all_uma_race[["Kyori"]]=all_uma_race[["Kyori"]].astype(float)
    all_uma_race[["TrackCD"]]=all_uma_race[["TrackCD"]].astype(int)
    all_uma_race['Seconds'] = all_uma_race['Time'].map(convert_time_string)
    all_uma_race[["PayTansyoPay1"]]=all_uma_race[["PayTansyoPay1"]].astype(float)
    all_uma_race[["PayTansyoNinki1"]]=all_uma_race[["PayTansyoNinki1"]].astype(int)
    all_uma_race[["TimeDiff"]]=all_uma_race[["TimeDiff"]].astype(float)
    all_uma_race[["KakuteiJyuni"]]=all_uma_race[["KakuteiJyuni"]].astype(int)
    all_uma_race[["Odds"]]=all_uma_race[["Odds"]].astype(float)
    all_uma_race[["Ninki"]]=all_uma_race[["Ninki"]].astype(int)


    # Time 0のデータを除外
    all_uma_race = all_uma_race[all_uma_race['Seconds'] > 0]
    # 障害レースを除外
    all_uma_race = all_uma_race[all_uma_race['TrackCD'] < 50]
    
    # ダートレースに限定
    if race_type == 1:
        all_uma_race = all_uma_race[all_uma_race['TrackCD'] >= 23]
    # 芝レースに限定
    elif race_type == 2:
        all_uma_race = all_uma_race[all_uma_race['TrackCD'] <= 22]
        all_uma_race = all_uma_race[all_uma_race['TrackCD'] >= 10]

    # KakuteiJyuni=01のみ対象とする
    # all_uma_race = all_uma_race[all_uma_race['KakuteiJyuni'] == '01']

    # 競馬場を限定する
    if JyoCD is not None:
        all_uma_race = all_uma_race[all_uma_race['JyoCD'] == JyoCD]

    #単勝払戻金を確定順位１以外の馬は0に設定する
    all_uma_race.loc[all_uma_race['KakuteiJyuni'] != 1,'PayTansyoPay1'] = 0
    #単勝人気を確定順位１以外の馬はNoneに設定する
    all_uma_race.loc[all_uma_race['KakuteiJyuni'] != 1,'PayTansyoNinki1'] = None
    return all_uma_race

In [12]:
def buiuld_zenso_data(year):
    data = pickup_data(year=year)
    # 前走の確定順位と時間差をカラムに追加する
    sorted = data.sort_values(by=['KettoNum','Year','MonthDay'])
    for i in range(0, sorted.shape[0]-1):
        kt1 = sorted.iloc[i]['KettoNum']
        kt2 = sorted.iloc[i+1]['KettoNum']
        if kt1 == kt2:
            sorted.loc[sorted.index[i],'ZensoKakuteiJyuni']  = sorted.iloc[i+1]['KakuteiJyuni']
            sorted.loc[sorted.index[i],'ZensoTimeDiff']  = sorted.iloc[i+1]['TimeDiff']
        else:
            sorted.loc[sorted.index[i],'ZensoKakuteiJyuni']  = None
            sorted.loc[sorted.index[i],'ZensoTimeDiff']  = None
    return sorted.sort_index()

In [13]:
#データをDBから読み込み
data_from_db = {}
year = 2017
data_from_db[str(year)] = buiuld_zenso_data(year)

In [14]:
# Tensor Flowによる回帰分析
# 各種パラメータと回収率の関係
data = data_from_db[str(year)].copy()

#data['log_tansho_pay'] = data.PayTansyoPay1.apply(lambda x: np.log(x+1))

# 回帰式の次数
dim = 4
x = tf.placeholder(tf.float32, [None, dim+1])
w = tf.Variable(tf.zeros([dim+1,1]))
y = tf.matmul(x,w)
t = tf.placeholder(tf.float32, [None, 1])

#誤差関数（最小二乗法)
loss = tf.reduce_sum(tf.square(y - t))

#勾配下降方
train_step = tf.train.AdamOptimizer().minimize(loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

#  前走データがないデータは除く
data = data[np.isfinite(data.ZensoKakuteiJyuni)]

# 前走着差が負のデータは0に設定する 
data.loc[data['ZensoTimeDiff'] < 0.0, 'ZensoTimeDiff'] = 0.0

# 予測値は払い戻し額(np array)

#train_t = np.array(data.log_tansho_pay)
train_t = np.array(data.PayTansyoPay1)

#　説明変数の設定

# 前走確定順位
x_array = np.array(data.ZensoKakuteiJyuni)

#前走着差
#x_array = np.array(data.ZensoTimeDiff)

# 単勝人気
# x_array = np.array(data.Ninki)

# 単勝Odds
#x_array = np.array(data.Odds)

data_count = len(x_array)

train_t = train_t.reshape([data_count, 1])
train_x = np.zeros([data_count,dim+1])

for row in range(0,data_count):
    for col in range(0, dim):
        train_x [row][col] = x_array[row] ** col

i = 0
for _ in range(200000):
    i += 1
    sess.run(train_step, feed_dict={x:train_x, t: train_t})
    if i % 10000 == 0:
        loss_val = sess.run(loss, feed_dict={x:train_x, t: train_t})
        print ('Step : %8d, Loss: %12.2f' % (i, loss_val))

Step :    10000, Loss: 12396089344.00
Step :    20000, Loss: 12381460480.00
Step :    30000, Loss: 12378497024.00
Step :    40000, Loss: 12377271296.00
Step :    50000, Loss: 12376395776.00
Step :    60000, Loss: 12375853056.00
Step :    70000, Loss: 12375476224.00
Step :    80000, Loss: 12375738368.00
Step :    90000, Loss: 12375799808.00
Step :   100000, Loss: 12375820288.00
Step :   110000, Loss: 12375820288.00
Step :   120000, Loss: 12375794688.00
Step :   130000, Loss: 12375824384.00
Step :   140000, Loss: 12375729152.00
Step :   150000, Loss: 12375742464.00
Step :   160000, Loss: 12375820288.00
Step :   170000, Loss: 12375803904.00
Step :   180000, Loss: 12375817216.00
Step :   190000, Loss: 12375820288.00
Step :   200000, Loss: 12375865344.00


In [10]:
w_val = sess.run(w)
print (w_val)

[[  9.19660568e+01]
 [ -1.97686684e+00]
 [  2.90925968e-02]
 [  1.11132940e-04]
 [  0.00000000e+00]]


In [15]:
w_val = sess.run(w)

def predict(x,y):
    result = 0.0
    for n in range(0, dim):
        result += w_val[n][0] * x**n
    result += w_val[dim][0] * y
    return result

XLIMIT=18
fig = plt.figure()
subplot = fig.add_subplot(1,1,1)
subplot.set_xlim(0, XLIMIT)
#subplot.set_ylim(0, 300)

# 散布図の描画
#subplot.scatter(x_array, train_t)

#回帰直線の描画
linex = np.linspace(1, XLIMIT, 100)
liney = predict(linex,10)
subplot.plot(linex, liney, color="red")

#ラベルの設定
#subplot.set_title("Previous race time diff and return(%s)" % (year))
#subplot.set_xlabel('Previous race time diff[unit:0.1 sec]')
subplot.set_title("Previous race order and return(%s)" % (year))
subplot.set_xlabel('Previous race order')

subplot.set_ylabel('Return')

<matplotlib.text.Text at 0x1329688d0>