In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import lightgbm
from sklearn.metrics import accuracy_score
from imblearn.under_sampling import RandomUnderSampler
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import re
import pickle

In [30]:
results_addinfo = pd.read_pickle("results_addinfo.pickle")
horse_history = pd.read_pickle("horse_history.pickle")
jockey_history = pd.read_pickle("jockey_history.pickle")
dict_horse_history = {idx: horse_history.loc[idx] for idx in horse_history.index.unique()}

with open("pay_dict.pickle", "rb") as f:
    pay_dict = pickle.load(f)
with open("horse_ped_dict.pickle", "rb") as f:
    horse_ped_dict = pickle.load(f)

In [None]:
def siba(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:x[1:])
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df_present.loc[df_present["賞金"] > 0]
    df = df[df["ground"] == "芝"]
    n_siba = df.shape[0]
    return n_siba

def function1(x):
    return str(x["距離"])[0]
def function2(x):
    return str(x["距離"])[1:]

def siba_ave_order(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:x[1:])
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df[df["ground"] == "芝"]
    df = df[df["着順"].apply(lambda x: isinstance(x, (int, float)))]
    ave = 0
    if df.shape[0] > 0:
        ave = df["着順"].mean()       
    return ave


def dart(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:x[1:])
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df_present[df_present["賞金"] > 0]
    df = df[df["ground"] == "ダ"]
    n_dart = df.shape[0]
    return n_dart

def dart_ave_order(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:x[1:])
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df[df["ground"] == "ダ"]
    df = df[df["着順"].apply(lambda x: isinstance(x, (int, float)))]
    ave = 0
    if df.shape[0] > 0:
        ave = df["着順"].mean()       
    return ave

def shougai(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:x[1:])
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df_present[df_present["賞金"] > 0]
    df = df[df["ground"] == "障"]
    n_shougai = df.shape[0]
    return n_shougai

def shougai_ave_order(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:x[1:])
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df[df["ground"] == "障"]
    df = df[df["着順"].apply(lambda x: isinstance(x, (int, float)))]
    ave = 0
    if df.shape[0] > 0:
        ave = df["着順"].mean()       
    return ave

def short_distance(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:int(x[1:]))
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df_present[df_present["賞金"] > 0]
    df = df[df["distance"] < 1300]
    n_short = df.shape[0]
    return n_short

def short_ave_order(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:int(x[1:]))
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df[df["distance"] < 1300]
    df = df[df["着順"].apply(lambda x: isinstance(x, (int, float)))]
    ave = 0
    if df.shape[0] > 0:
        ave = df["着順"].mean()       
    return ave

def mile_distance(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:int(x[1:]))
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df_present[df_present["賞金"] > 0]
    if df.shape[0] > 0:
        df = df[(1300 <= df["distance"]) & (df["distance"] < 1900)]
    n_mile = df.shape[0]
    return n_mile

def mile_ave_order(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:int(x[1:]))
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df[(1300 <= df["distance"]) & (df["distance"] < 1900)]
    df = df[df["着順"].apply(lambda x: isinstance(x, (int, float)))]
    ave = 0
    if df.shape[0] > 0:
        ave = df["着順"].mean()       
    return ave

def middle_distance(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:int(x[1:]))
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df_present[df_present["賞金"] > 0]
    if df.shape[0] > 0:
        df = df[(1900 <= df["distance"]) & (df["distance"] < 2750)]
    n_middle = df.shape[0]
    return n_middle

def middle_ave_order(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:int(x[1:]))
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df_present[df_present["賞金"] > 0]
    df = df[(1900 <= df["distance"]) & (df["distance"] < 2750)]
    df = df[df["着順"].apply(lambda x: isinstance(x, (int, float)))]
    ave = 0
    if df.shape[0] > 0:
        ave = df["着順"].mean()       
    return ave



def long_distance(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:int(x[1:]))
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df_present[df_present["賞金"] > 0]
    df = df[2750 <= df["distance"]]
    n_long = df.shape[0]
    return n_long

def long_ave_order(x):
    horse_id = x["horse_id"]
    today = x["date"]
    df = dict_horse_history[horse_id]
    if isinstance(df, pd.DataFrame) == False:
        df = pd.DataFrame(df).T
    df["ground"] = df["距離"].apply(lambda x:x[0])
    df["distance"] = df["距離"].apply(lambda x:int(x[1:]))
    df_present = df[today - df["日付"] > timedelta(0)]
    df = df_present[df_present["賞金"] > 0]
    df = df[2750 <= df["distance"]]
    df = df[df["着順"].apply(lambda x: isinstance(x, (int, float)))]
    ave = 0
    if df.shape[0] > 0:
        ave = df["着順"].mean()       
    return ave
    

In [None]:
def horse_ped_score(x):
    horse_id = x["horse_id"]
    horse_parents = horse_ped_dict[horse_id]
    parent_1 = horse_parents["1_parent"]
    parent_2 = horse_parents["2_parent"]
    parent_1_id = []
    parent_2_id = []
    for i in range(2):
        if (len(re.findall("\D", parent_1[i][0])) == 0):
            parent_1_id.append(parent_1[i][0])
    for i in range(4):
        if (len(re.findall("\D", parent_2[i][0])) == 0):
            parent_2_id.append(parent_2[i][0])

    score1_list = []
    score2_list = []
    for i in parent_1_id:
        if i not in dict_horse_history.keys():
            horse_url = "https://db.netkeiba.com/horse/" + i
            tmp = pd.read_html(horse_url)[3]
            if "通算産駒成績 詳しく見る" in tmp.columns:
                continue
            if "受賞歴" in tmp.columns:
                tmp = pd.read_html(horse_url)[4]
            if isinstance(tmp, pd.DataFrame) == False:
                tmp = pd.DataFrame(tmp).T
            tmp["着順"] = tmp.apply(fun, axis=1)  
            tmp["着順"] = tmp["着順"].astype(int)
            tmp["賞金"] = tmp["賞金"].astype(float)
            tmp["頭数"] = tmp["頭数"].fillna(0)
            tmp["頭数"] = tmp["頭数"].astype(int)
            dict_horse_history[i] = tmp
        df = dict_horse_history[i].copy()
        df = df[df["頭数"] > 0]
        score1 = (df["着順"] / df["頭数"]).mean()
        score1 = 1 / score1
        score1_list.append(score1)
    for i in parent_2_id:
        if i not in dict_horse_history.keys():
            horse_url = "https://db.netkeiba.com/horse/" + i
            tmp = pd.read_html(horse_url)[3]
            if "通算産駒成績 詳しく見る" in tmp.columns:
                continue
            if "受賞歴" in tmp.columns:
                tmp = pd.read_html(horse_url)[4]
            if isinstance(tmp, pd.DataFrame) == False:
                tmp = pd.DataFrame(tmp).T   
            if "着順" not in tmp:
                print(horse_url)
                print(tmp)    
            tmp["着順"] = tmp.apply(fun, axis=1)  
            tmp["着順"] = tmp["着順"].astype(int)
            tmp["賞金"] = tmp["賞金"].astype(float)
            tmp["頭数"] = tmp["頭数"].fillna(0)
            tmp["頭数"] = tmp["頭数"].astype(int)
            dict_horse_history[i] = tmp
        df = dict_horse_history[i].copy()
        df = df[df["頭数"] > 0]
        score2 = (df["着順"] / df["頭数"]).mean()
        score2 = (1 / score2) * 0.5
        score2_list.append(score2) 

    horse_ped_score = 0
    if len(score1_list) == 0:
        if len(score2_list) == 0:
            horse_ped_score = 0
        else:
            horse_ped_score = sum(score2_list) / len(score2_list)
    else:
        if len(score2_list) == 0:
            horse_ped_score = sum(score1_list) / len(score1_list)
        else:
            horse_ped_score = (sum(score1_list) / len(score1_list)) + (sum(score2_list) / len(score2_list))*0.5
    return horse_ped_score    
