In [1]:
import os
import sys
from bs4 import BeautifulSoup
import pymysql
import requests
import re
import time

from Utils.bulk_insert import BulkInsert

In [2]:
db_params = {
    'host': '127.0.0.1',
    'user': 'root',
    'password': 'daigo1123',
    'database': 'dev_netkeiba',
    'port': 3306,
    'charset': 'utf8'
}
con = pymysql.connect(**db_params)

parameters = {

    # parameters about scraping
    'URL_ABOUT_NETKEIBA': {
        'RACE_TABLE': 'https://race.netkeiba.com/?pid=race_old&id=c',
        'RACE_RESULT': 'https://race.netkeiba.com/?pid=race&id=c{RACE_ID}&mode=result',
        'RACE_PAST5_RESULT': 'https://race.netkeiba.com/?pid=race&id=c{RACE_ID}&mode=shutuba'
    },

    # parameters about model training

    # col names in database tables
    'TABLE_COL_NAMES': {
        'race_master': [
            'race_id',
            'race_title',
            'race_course',
            'race_weather',
            'race_condition',
            'race_year',
            'race_month',
            'race_date',
            'race_dow',
            'starting_time',
            'race_info_1',
            'race_info_2',
            'race_info_3'
        ],
        'race_table_info': [
            'race_id',
            'bracket_num',
            'horse_num',
            'horse_name',
            'horse_age',
            'horse_sex',
            'weight_penalty',
            'jockey_name',
            'href_to_jockey',
            'owner_name',
            'href_to_owner',
            'horse_weight',
            'horse_weight_increment',
            'win_odds',
            'popularity_order'
        ],
        'race_result_info': [
            'race_id',
            'bracket_num',
            'horse_num',
            'arrival_time',
            'arrival_diff',
            'arrrival_order'
        ],
        'race_refund_info': [
            'race_id',
            'refund_type',
            'groupby_index',
            'bracket_num',
            'refund_yen',
            'popularity_order'
        ],
        'race_past_5_result_info':[
            'race_id',
            'bracket_num',
            'horse_num',
            'past_x',
            'arrival_order'
        ]
    },

    # col names in dataframe
    'DATAFRAME_COL_NAMES': {

    }
}

In [3]:
def _fetchall_and_make_list_by(query):
    try:
        cursor = con.cursor()
        cursor.execute(query)
        fetch_result = cursor.fetchall()
        fetch_result_list = [item for item in fetch_result]
        cursor.close()
        return fetch_result_list
    except Exception as e:
        print(e)

# def _execute_query(query):
#     try:
#         cursor = con.cursor()
#         cursor.execute(query)
#         cursor.close()
#         con.commit()
#     except Exception as e:
#         print(e)

# def _truncate_target_rows(race_id):
#     queries = [
#         'TRUNCATE TABLE race_master WHERE race_id = "{RACE_ID}";'.format(RACE_ID=race_id),
#         'TRUNCATE TABLE race_table_info WHERE race_id = "{RACE_ID}";'.format(RACE_ID=race_id)
#     ]
#     for query in queries:
#         print(query)
#         _execute_query(query)

def _bulk_insert(insert_list, target_table_name, insert_col_names):
    try:
        bi = BulkInsert(con)
        bi.execute(insert_data=insert_list, target_table=target_table_name, col_names=insert_col_names)
    except TypeError as e:
        print(e)
        raise TypeError

## Get info about race prior table 

In [4]:
def _get_num_str(num):
    num_str = str(num) if num >= 10 else '0' + str(num)
    return num_str

In [5]:
# def make_race_id_and_target_url(event_year, event_place, event_month, event_time, event_race):
#     race_id = str(event_year) + _get_num_str(event_place) + _get_num_str(event_month) + _get_num_str(event_time) + _get_num_str(event_race)
#     target_url = parameters['URL_ABOUT_NETKEIBA']['RACE_TABLE'] + race_id
    
#     return race_id, target_url

def _make_race_ids_list():
    query = 'SELECT * FROM race_calender_master;'
    return _fetchall_and_make_list_by(query)

def _make_race_id_and_target_url(race_calender):
    race_id = ''.join(map(lambda x: _get_num_str(x), race_calender))
    target_url = parameters['URL_ABOUT_NETKEIBA']['RACE_TABLE'] + race_id
    return race_id, target_url

In [6]:
def _is_the_race_id_existing_in_master(race_id):
    query_existing = """
        SELECT race_id FROM race_master WHERE race_id = '{RACE_ID}';
    """.format(RACE_ID=race_id)
    race_id_list_in_master_existing = _fetchall_and_make_list_by(query_existing)
    query_not_existing = """
        SELECT race_id FROM race_master_not_existing WHERE race_id = '{RACE_ID}';
    """.format(RACE_ID=race_id)
    race_id_list_in_master_not_existing = _fetchall_and_make_list_by(query_not_existing)
    race_id_list_in_master = race_id_list_in_master_existing + race_id_list_in_master_not_existing

    if len(race_id_list_in_master) > 0:
        return True
    else:
        return False

In [75]:
def _extract_common_info(soup, race_id):
    race_title = soup.find('div', class_='RaceList_Item02').find('div', class_='RaceName').text.replace(u'\n',u'')
    
    starting_time = soup.find('div', class_='RaceList_Item02').find('div', class_='RaceData01').text.replace(u'\n',u'').split('/')[0]
    starting_time = re.search('(.*)発走', starting_time).group(1)
    
    race_coure = soup.find('div', class_='RaceList_Item02').find('div', class_='RaceData01').text.replace(u'\n',u'').split('/')[1]

    race_weather = soup.find('div', class_='RaceList_Item02').find('div', class_='RaceData01').text.replace(u'\n',u'').split('/')[2]
    race_weather = re.search('天候:(.*)', race_weather).group(1)

    race_condition = soup.find('div', class_='RaceList_Item02').find('div', class_='RaceData01').text.replace(u'\n',u'').split('/')[3]
    race_condition = re.search('馬場:(.*)', race_condition).group(1)

    race_date_info = soup.find('dl', id='RaceList_DateList').find('dd', class_='Active').text
    race_year = race_id[:4]
    race_month = re.split('月|日|\(|\)', race_date_info)[0]
    race_date = re.split('月|日|\(|\)', race_date_info)[1]
    race_dow = re.split('月|日|\(|\)', race_date_info)[3]
    
    race_info_1 = ' '.join([i.text for i in soup.find('div', class_='RaceData02').find_all('span')])
    race_info_2 = ''
    race_info_3 = ''
    
    return [race_id, race_title, race_coure, race_weather, race_condition, race_year, race_month, race_date, race_dow, starting_time, race_info_1, race_info_2, race_info_3]

In [99]:
table_element = soup.find('table', class_='Shutuba_Table').find_all('tr')
row = 3
table_element[row].find_all('td')[10].find('span')

<span id="ninki-1_02">**</span>

In [90]:
def _extract_race_table(soup, race_id):
    this_race_table_info = []
    
    table_element = soup.find('table', class_='Shutuba_Table').find_all('tr')
    table_length = len(table_element)
    for row in range(3, table_length):
        bracket_num = int(table_element[row].find_all('td')[0].text)
        horse_num = int(table_element[row].find_all('td')[1].text)
        horse_name = table_element[row].find_all('td')[3].find('a').text
        href_to_horse = table_element[row].find_all('td')[3].find('a').attrs['href']
        sex_and_age = table_element[row].find_all('td')[4].text
        horse_sex = int(re.sub("\\D", "", sex_and_age))
        horse_age = re.match('[0-9a-zA-Zあ-んア-ン一-鿐]', sex_and_age).group()

        weight_penalty = table_element[row].find_all('td')[5].text
        if weight_penalty != '':
            weight_penalty = float(weight_penalty)
        else:
            weight_penalty = ''

        jockey_name = table_element[row].find_all('td')[6].text.replace(u'\n',u'')
        href_to_jockey = table_element[row].find_all('td')[6].find('a').attrs['href']
        owner_name = table_element[row].find_all('td')[7].text.replace(u'\n',u'')
        href_to_owner = table_element[row].find_all('td')[7].find('a').attrs['href']

        horse_weight_info = table_element[row].find_all('td')[8].text
        if horse_weight_info != '':
            horse_weight = int(re.split('\(|\)', horse_weight_info)[0])
            horse_weight_increment = re.split('\(|\)', horse_weight_info)[1]
        else:
            horse_weight = ''
            horse_weight_increment = ''

        win_odds = table_element[row].find_all('td')[9].text
        popularity_order = table_element[row].find_all('td')[10].text
        
        this_race_table_info.append([
            race_id,
            bracket_num,
            horse_num,
            horse_name,
            horse_age,
            horse_sex,
            weight_penalty,
            jockey_name,
            href_to_jockey,
            owner_name,
            href_to_owner,
            horse_weight,
            horse_weight_increment,
            win_odds,
            popularity_order
        ])
        
    return this_race_table_info

In [13]:
def get_race_master_and_table_info():
    race_calender_master_list = _make_race_ids_list()
    for race_calender in race_calender_master_list:
        race_id, target_url = _make_race_id_and_target_url(race_calender)

        if _is_the_race_id_existing_in_master(race_id):
            print('Info about', target_url, 'is already existing in master')
            continue

        html = requests.get(target_url)
        html.encoding = 'EUC-JP'
        soup = BeautifulSoup(html.text, 'html.parser')

        if not soup.find_all('table', attrs={'class', 'race_table_old nk_tb_common'}):
            print('Target URL to requests ', target_url, 'does not exist')
            continue

        print('Target URL to requests: ', target_url)
        try:
            race_master_list = _extract_common_info(soup, race_id)
            race_table_info_list = _extract_race_table(soup, race_id)
            if ' ' in race_master_list:
                continue
            _bulk_insert([race_master_list], 'race_master', self.parameters['TABLE_COL_NAMES']['race_master'])
            _bulk_insert(race_table_info_list, 'race_table_info', self.parameters['TABLE_COL_NAMES']['race_table_info'])
        except (AttributeError, ValueError):
            print('\t This URL has no common info')

        time.sleep(1)

In [None]:
# race_master_list, race_table_info_list = get_and_insert_race_master_and_table_info()
# get_and_insert_race_master_and_table_info()

In [108]:
race_calender_master_list = _make_race_ids_list()
# race_calender = race_calender_master_list[0]
race_calender = (2019, 9, 5, 9, 1)
race_id, target_url = _make_race_id_and_target_url(race_calender)

print('Target URL to requests: ', target_url)

html = requests.get(target_url, timeout=5)
html.encoding = 'EUC-JP'
soup = BeautifulSoup(html.text, 'html.parser')

Target URL to requests:  https://race.netkeiba.com/?pid=race_old&id=c201909050901


In [109]:
race_master_list = _extract_common_info(soup, race_id)
race_master_list

['201909050901',
 '2歳未勝利',
 ' ダ1800m (右)',
 '晴',
 '稍',
 '2019',
 '12',
 '28',
 '土',
 '10:05',
 '5回 阪神 9日目 サラ系２歳 未勝利 [指] 馬齢 16頭 本賞金:500,200,130,75,50万円',
 '',
 '']

In [110]:
race_table_info_list = _extract_race_table(soup, race_id)
race_table_info_list

[['201909050901',
  1,
  2,
  'リトルクレバー',
  '牡',
  2,
  55.0,
  '北村友',
  'https://db.netkeiba.com/jockey/01102/',
  '栗東佐々木',
  'https://db.netkeiba.com/trainer/00429/',
  444,
  '-2',
  '---.-',
  '\n**\n'],
 ['201909050901',
  2,
  3,
  'カシノシャイニング',
  '牡',
  2,
  54.0,
  '森裕',
  'https://db.netkeiba.com/jockey/01165/',
  '栗東藤沢則',
  'https://db.netkeiba.com/trainer/01041/',
  436,
  '+2',
  '---.-',
  '\n**\n'],
 ['201909050901',
  2,
  4,
  'メイケイカガヤキ',
  '牡',
  2,
  55.0,
  '鮫島駿',
  'https://db.netkeiba.com/jockey/01157/',
  '栗東中竹',
  'https://db.netkeiba.com/trainer/01039/',
  488,
  '+8',
  '---.-',
  '\n**\n'],
 ['201909050901',
  3,
  5,
  'ホシムスメ',
  '牝',
  2,
  54.0,
  '藤井勘',
  'https://db.netkeiba.com/jockey/05525/',
  '栗東清水久',
  'https://db.netkeiba.com/trainer/01110/',
  472,
  '0',
  '---.-',
  '\n**\n'],
 ['201909050901',
  3,
  6,
  'ブルベアマイル',
  '牡',
  2,
  55.0,
  '酒井',
  'https://db.netkeiba.com/jockey/01034/',
  '栗東武英',
  'https://db.netkeiba.com/trainer/01161/',
  444,
 

In [None]:
# _bulk_insert(race_id, race_master_list, 'race_master', parameters['TABLE_COL_NAMES']['race_master'])

In [None]:
# _bulk_insert(race_id, race_table_info_list, 'race_table_info', parameters['TABLE_COL_NAMES']['race_table_info'])                        

## Get info about race result

In [None]:
def _fetchall_and_make_list_by(query, con):
    try:
        cursor = con.cursor()
        cursor.execute(query)
        fetch_result = cursor.fetchall()
        fetch_result_list = [item for item in fetch_result]
        cursor.close()
        return fetch_result_list
    except Exception as e:
        print(e)

In [None]:
def _make_target_url_about_race_result(race_id):
        return parameters['URL_ABOUT_NETKEIBA']['RACE_RESULT'].format(RACE_ID=race_id)

In [None]:
def _extract_race_ids_in_master_not_exist_in_race_result():
    query = """
        SELECT DISTINCT race_id 
        FROM race_master
        WHERE race_id NOT IN (SELECT DISTINCT race_id FROM race_result_info);
    """
    result = _fetchall_and_make_list_by(query, con)
    return result

In [None]:
def _extract_race_result_info(soup, race_id):
    this_race_result_info = []
    table_length = len(soup.find('table', class_='race_table_01 nk_tb_common').find_all('tr'))
    
    for row in range(1, table_length):
        arrrival_order  = soup.find('table', class_='race_table_01 nk_tb_common').find_all('tr')[row].find_all('td')[0].text
        bracket_num = soup.find('table', class_='race_table_01 nk_tb_common').find_all('tr')[row].find_all('td')[1].text
        horse_num = soup.find('table', class_='race_table_01 nk_tb_common').find_all('tr')[row].find_all('td')[2].text
        arrival_time = soup.find('table', class_='race_table_01 nk_tb_common').find_all('tr')[row].find_all('td')[7].text
        arrival_diff = soup.find('table', class_='race_table_01 nk_tb_common').find_all('tr')[row].find_all('td')[8].text
        
        this_race_result_info.append([
            race_id,
            bracket_num,
            horse_num,
            arrival_time,
            arrival_diff,
            arrrival_order
        ])

    return this_race_result_info

In [None]:
def _extract_race_refund_info(soup, race_id):
    empty_refund_list = []
    refund_table_list = soup.find('dd', class_='fc').find_all('tr')
    for i in range(len(refund_table_list)):
        refund_table = refund_table_list[i]
        refund_type = refund_table.find('th').text

        if refund_type == '単勝':
            empty_refund_list.append(
                [race_id, '単勝', 1,  
                 int(refund_table.find_all('td')[0].text),
                 int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                 int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )

        elif refund_type == '複勝':
            empty_refund_list.append(
                [race_id, '複勝', 1,  
                 int(refund_table.find_all('td')[0].text[:2]),
                 int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                 int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )
            empty_refund_list.append(
                [race_id, '複勝', 2,  
                 int(refund_table.find_all('td')[0].text[2:4]),
                 int(refund_table.find_all('td')[1].text.split('円')[1].replace(',', '')),
                 int(refund_table.find_all('td')[2].text.split('人気')[1].replace(',', ''))]
            )
            try:
                empty_refund_list.append(
                    [race_id, '複勝', 3, 
                     int(refund_table.find_all('td')[0].text[4:6]),
                     int(refund_table.find_all('td')[1].text.split('円')[2].replace(',', '')),
                     int(refund_table.find_all('td')[2].text.split('人気')[2].replace(',', ''))]
                )
            except ValueError:
                pass

        elif refund_type == '枠連':
            empty_refund_list.append(
                [race_id, '枠連', 1,  
                 int(refund_table.find_all('td')[0].text.split('-')[0]),
                 int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                 int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))],
            )
            empty_refund_list.append(
                [race_id, '枠連', 1,  
                 int(refund_table.find_all('td')[0].text.split('-')[1]),
                 int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                 int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )

        elif refund_type == '馬連':
            empty_refund_list.append(
                [race_id, '馬連', 1,  
                int(refund_table.find_all('td')[0].text.split('-')[0]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))],
            )
            empty_refund_list.append(
                [race_id, '馬連', 1,  
                int(refund_table.find_all('td')[0].text.split('-')[1]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )   
            
        elif refund_type == 'ワイド':
            empty_refund_list.append(
                [race_id, 'ワイド', 1,  
                int(refund_table.find_all('td')[0].text[:2]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )
            empty_refund_list.append(
                [race_id, 'ワイド', 1,  
                int(refund_table.find_all('td')[0].text[5:7]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )
            empty_refund_list.append(
                [race_id, 'ワイド', 2,  
                int(refund_table.find_all('td')[0].text[7:9]),
                int(refund_table.find_all('td')[1].text.split('円')[1].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[1].replace(',', ''))]
            )
            empty_refund_list.append(
                [race_id, 'ワイド', 2,  
                int(refund_table.find_all('td')[0].text[12:14]),
                int(refund_table.find_all('td')[1].text.split('円')[1].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[1])],            
            )
            empty_refund_list.append(
                [race_id, 'ワイド', 3,  
                int(refund_table.find_all('td')[0].text[14:16]),
                int(refund_table.find_all('td')[1].text.split('円')[2].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[2].replace(',', ''))]
            )
            empty_refund_list.append(
                [race_id, 'ワイド', 3,  
                int(refund_table.find_all('td')[0].text[19:22]),
                int(refund_table.find_all('td')[1].text.split('円')[2].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[2].replace(',', ''))]
            )   
            
        elif refund_type == '馬単':
            empty_refund_list.append(
                [race_id, '馬単', 1,
                int(refund_table.find_all('td')[0].text.split('→')[0]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )
            empty_refund_list.append(
                [race_id, '馬単', 1,
                int(refund_table.find_all('td')[0].text.split('→')[1]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0])]             
            )
            
        elif refund_type == '三連複':
            empty_refund_list.append(
                [race_id, '三連複', 1,
                int(refund_table.find_all('td')[0].text.split('-')[0]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )
            empty_refund_list.append(
                [race_id, '三連複', 1,
                int(refund_table.find_all('td')[0].text.split('-')[1]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )        
            empty_refund_list.append(
                [race_id, '三連複', 1,
                int(refund_table.find_all('td')[0].text.split('-')[2]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))] 
            )       
            
        elif refund_type == '三連単':
            empty_refund_list.append(
                [race_id, '三連単', 1,
                int(refund_table.find_all('td')[0].text.split('→')[0]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )
            empty_refund_list.append(
                [race_id, '三連単', 1,
                int(refund_table.find_all('td')[0].text.split('→')[1]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )        
            empty_refund_list.append(
                [race_id, '三連単', 1,
                int(refund_table.find_all('td')[0].text.split('→')[2]),
                int(refund_table.find_all('td')[1].text.split('円')[0].replace(',', '')),
                int(refund_table.find_all('td')[2].text.split('人気')[0].replace(',', ''))]
            )                

    return empty_refund_list

In [None]:
def get_race_result_info():
    existing_race_ids_in_master = _extract_race_ids_in_master()
    
    for id_idx in range(len(existing_race_ids_in_master)):
        race_id = existing_race_ids_in_master[id_idx][0]
        target_url = _make_target_url_about_race_result(race_id)
        
        html = requests.get(target_url)
        html.encoding = 'EUC-JP'
        soup = BeautifulSoup(html.text, 'html.parser')

        if not soup.find_all('table', attrs={'class', 'race_table_01 nk_tb_common'}):
            print('Target URL to requests ', target_url, 'does not exist.')
            break

        print('Target URL to requests: ', target_url)
        race_result_info_list = _extract_race_result_info(soup, race_id)
        race_refund_info_list = _extract_race_refund_info(soup, race_id)
        
        _bulk_insert(race_result_info_list, 'race_result_info', parameters['TABLE_COL_NAMES']['race_result_info'])
        _bulk_insert(race_refund_info_list, 'race_refund_info', parameters['TABLE_COL_NAMES']['race_refund_info'])                        

        time.sleep(1)

In [None]:
# get_race_result_info()

In [None]:
# race_id = '201506050811'
# target_url = 'https://race.netkeiba.com/?pid=race&id=p201506050811&mode=result'
# html = requests.get(target_url)
# html.encoding = 'EUC-JP'
# soup = BeautifulSoup(html.text, 'html.parser')

In [None]:
# refund_table_list = soup.find('dd', class_='fc').find_all('tr')
# refund_table_list[1].find_all('td')[1]

In [None]:
# _extract_race_refund_info(soup, race_id)

## Get info about past 5 race result

In [None]:
def _extract_race_ids_in_master_not_exist_in_race_past_5_result():
    query = """
        SELECT DISTINCT race_id 
        FROM race_master
        WHERE race_id NOT IN (SELECT DISTINCT race_id FROM race_past_5_result_info);
    """
    result = _fetchall_and_make_list_by(query, con)
    return result

In [None]:
def _make_target_url_about_past_5_race_result(race_id):
    return parameters['URL_ABOUT_NETKEIBA']['RACE_PAST5_RESULT'].format(RACE_ID=race_id)

In [None]:
def _extract_past_5_race_result(soup, race_id):
    this_race_past5_result_info = []
    table_element = soup.find('table', class_='race_table_01 nk_tb_common shutuba_table').find_all('tr')
    table_length = len(table_element)

    for row in range(1, table_length):
        bracket_num = table_element[row].find_all('td')[0].text
        horse_num = table_element[row].find_all('td')[1].text

        post_x = 0
        for col in range(1, 15):
            try:
                race_name_element = table_element[row].find_all('td')[col].find('span', class_='race_name')
                past_x_race_title = race_name_element.text
                past_x_race_id = int(re.sub('\\D', '', race_name_element.find('a').attrs['href']))
                order = table_element[row].find_all('td')[col].find('span', class_='order').text
                post_x += 1
                this_race_past5_result_info.append([race_id, bracket_num, horse_num, post_x, past_x_race_title,  past_x_race_id, order])
            except (IndexError, AttributeError):
                pass

    return this_race_past5_result_info

In [None]:
def get_past_5_race_result_info():
    existing_race_ids_in_master = _extract_race_ids_in_master_not_exist_in_race_past_5_result()
    
    for id_idx in range(len(existing_race_ids_in_master)):
        race_id = existing_race_ids_in_master[id_idx][0]
        target_url = _make_target_url_about_past_5_race_result(race_id)
        
        html = requests.get(target_url)
        html.encoding = 'EUC-JP'
        soup = BeautifulSoup(html.text, 'html.parser')

        if not soup.find_all('table', attrs={'class', 'race_table_01 nk_tb_common shutuba_table'}):
            print('Target URL to requests ', target_url, 'does not exist.')
            break

        print('Target URL to requests: ', target_url)
        race_past5_result_info_list = _extract_past_5_race_result(soup, race_id)        
        _bulk_insert(race_past5_result_info_list, 'race_past_5_result_info', 
                     parameters['TABLE_COL_NAMES']['race_past_5_result_info'])                        

        time.sleep(1)

In [None]:
# race_id = '201910020609'
# target_url = _make_target_url_about_past_5_race_result(race_id)
# print(target_url)

In [None]:
# html = requests.get(target_url)
# html.encoding = 'EUC-JP'
# soup = BeautifulSoup(html.text, 'html.parser')

In [None]:
# _extract_past_5_race_result(soup, race_id)