In [191]:
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
import plotly.graph_objects as go

In [192]:
url_base = 'https://www.koreabaseball.com/Record/Player/'
category_list = ['HitterBasic/Basic1.aspx', 'PitcherBasic/Basic1.aspx', 'Defense/Basic.aspx', 'Runner/Basic.aspx']
data_list = []

for category in category_list:
    url = url_base + category
    req = requests.get(url)
    
    html = req.text
    soup = bs(html, 'html.parser')
    temp_table = soup.find('div', {'class': 'record_result'})
    
    col_tag = temp_table.find_all('td')
    col_list = ['Rank', 'Name', 'Team']
    
    for col in col_tag:
        try:
            temp_value = col.attrs['data-id']
            
            if temp_value not in col_list:
                col_list.append(temp_value)
            else:
                break
        except:
            pass
        
    temp_data = pd.DataFrame(columns = col_list)
    
    i = 0
    index = 0
    col_len = len(col_list)
    
    while True:
        try:
            temp_data.loc[i] = [x.text for x in temp_table.find_all('td')[index: index + col_len]]
            i += 1
            index += col_len
        except:
            break
            
    data_list.append(temp_data)

In [193]:
hitter_df = data_list[0]
hitter_df

Unnamed: 0,Rank,Name,Team,HRA_RT,GAME_CN,PA_CN,AB_CN,RUN_CN,HIT_CN,H2_CN,H3_CN,HR_CN,TB_CN,RBI_CN,SH_CN,SF_CN
0,1,최형우,KIA,0.354,140,600,522,93,185,37,1,28,308,115,0,3
1,2,손아섭,롯데,0.352,141,611,540,98,190,43,0,11,266,85,1,7
2,3,로하스,KT,0.349,142,628,550,116,192,39,1,47,374,135,0,8
3,4,박민우,NC,0.345,126,530,467,82,161,27,5,8,222,63,2,10
4,5,페르난데스,두산,0.34,144,668,586,104,199,29,0,21,291,105,0,11
5,6,이정후,키움,0.333,140,617,544,85,181,49,5,15,285,101,2,8
6,7,허경민,두산,0.332,117,487,437,70,145,25,1,7,193,58,3,7
7,8,김현수,LG,0.331,142,619,547,98,181,35,2,22,286,119,0,7
8,9,강백호,KT,0.33,129,574,500,95,165,36,1,23,272,89,0,3
9,10,양의지,NC,0.328,130,528,461,86,151,26,1,33,278,124,1,6


In [194]:
fig = go.Figure()

fig.add_trace(go.Bar(x=hitter_df.loc[:]['Rank'],
                    y=hitter_df.loc[:]['HRA_RT'].astype(float)))

fig.add_hline(y=hitter_df['HRA_RT'].astype(float).mean(),
             annotation_text='상위 30명의 평균 타율: 0.315',
             annotation_font_size=15)
fig.update_layout(yaxis_range=[0.2, 0.36],
                 xaxis_title='타율순위',
                 yaxis_title='타율')

#print("평균 타율: ", hitter_df['HRA_RT'].astype(float).mean())
fig.show()

In [195]:
url = 'https://namu.wiki/w/%ED%95%9C%EA%B5%AD%EC%9D%B8%20%EB%A9%94%EC%9D%B4%EC%A0%80%EB%A6%AC%EA%B1%B0'
req = requests.get(url)
html = req.text
soup = bs(html, 'html.parser')
print(soup.prettify())

<!DOCTYPE html>
<html data-n-head-ssr="">
 <head>
  <title>
   한국인 메이저리거 - 나무위키
  </title>
  <meta charset="utf-8" data-n-head="ssr"/>
  <meta content="user-scalable=no, initial-scale=1.0, maximum-scale=5.0, minimum-scale=1.0, width=device-width" data-n-head="ssr" name="viewport"/>
  <meta content="ie=edge" data-n-head="ssr" http-equv="x-ua-compatible"/>
  <meta content="the seed" data-n-head="ssr" name="generator"/>
  <meta content="yes" data-n-head="ssr" name="mobile-web-app-capable"/>
  <meta content="나무위키" data-n-head="ssr" name="application-name"/>
  <meta content="나무위키" data-n-head="ssr" name="msapplication-tooltip"/>
  <meta content="/w/%EB%82%98%EB%AC%B4%EC%9C%84%ED%82%A4:%EB%8C%80%EB%AC%B8" data-n-head="ssr" name="msapplication-starturl"/>
  <meta content="max-image-preview:large" data-n-head="ssr" name="robots"/>
  <meta content="#008275" data-n-head="ssr" name="theme-color"/>
  <meta content="noarchive" data-n-head="ssr" name="googlebot"/>
  <link data-n-head="ssr" href="htt

In [196]:
korean_majorleaguer = []

for td in soup('td'):
    for a in td('a'):
        if ('class' in a.attrs
            and 'wiki-link-internal' in a['class']
            and len(a.get_text())==3):
            
            korean_majorleaguer.append(a.get_text())
            
korean_majorleaguer = sorted(list(set(korean_majorleaguer)))
korean_majorleaguer

['강정호',
 '구대성',
 '김광현',
 '김병현',
 '김선우',
 '김하성',
 '김현수',
 '류제국',
 '류현진',
 '박병호',
 '박찬호',
 '백차승',
 '봉중근',
 '서재응',
 '오승환',
 '이대호',
 '이상훈',
 '임창용',
 '조진호',
 '최지만',
 '최희섭',
 '추신수',
 '황재균']

In [197]:
player_position = ['내야수', '투수', '투수', '투수', '투수', '내야수', '외야수', '투수', 
                   '투수', '내야수', '투수', '투수', '투수', '투수', '투수', '내야수',
                  '투수', '투수', '투수', '내야수', '내야수', '외야수', '내야수']
player_birth_year = ['1987', '1969', '1988', '1979', '1977', '1995', '1988', '1983', '1987', '1986', '1973', '1980',
                    '1980', '1977', '1982', '1982', '1971', '1976', '1975', '1991', '1979', '1982', '1987']
korean_majorleaguer_df = pd.DataFrame(data={'Name': korean_majorleaguer, 'Birth_year': player_birth_year,
                                            'Position': player_position})

korean_majorleaguer_df

Unnamed: 0,Name,Birth_year,Position
0,강정호,1987,내야수
1,구대성,1969,투수
2,김광현,1988,투수
3,김병현,1979,투수
4,김선우,1977,투수
5,김하성,1995,내야수
6,김현수,1988,외야수
7,류제국,1983,투수
8,류현진,1987,투수
9,박병호,1986,내야수


In [198]:
from selenium import webdriver
import time

In [199]:
# player_info_url = []

# # execute chrome web browser
# path = '/Users/dohee/Downloads/chromedriver'

# for player in korean_majorleaguer_df.loc[:]['Name'].to_list():
#     driver = webdriver.Chrome(path)
#     driver.get('https://www.koreabaseball.com/Player/Search.aspx')
    
#     # search player
#     element = driver.find_element_by_id("cphContents_cphContents_cphContents_txtSearchPlayerName")
#     element.send_keys(player)

#     # click the search button
#     driver.find_element_by_xpath('//*[@id="cphContents_cphContents_cphContents_btnSearch"]').click()
#     time.sleep(2)

#     # count number of person who has same name
#     count_search_result = driver.find_element_by_xpath('//*[@id="cphContents_cphContents_cphContents_udpRecord"]/div[2]/p/strong/span')
    
#     # if there's no information of the player
#     if count_search_result.text == '0':
#         player_info_url.append('NO_INFO_URL')
#     # if there's only one information of the player
#     elif count_search_result.text == '1':
#         driver.find_element_by_xpath('//*[@id="cphContents_cphContents_cphContents_udpRecord"]/div[2]/table/tbody/tr/td[2]/a').click()
#         player_info_url.append(driver.current_url)
#     # if there are multiple information of the player
#     else:
#         temp_ls = []
#         for count in range(int(count_search_result.text)):
#             driver.find_element_by_xpath('//*[@id="cphContents_cphContents_cphContents_udpRecord"]/div[2]/table/tbody/tr[{0}]/td[2]/a'.format(count+1)).click()
#             temp_ls.append(driver.current_url)
#             time.sleep(2)
#             driver.back()
#             time.sleep(2)
            
#             # search player
#             element = driver.find_element_by_id("cphContents_cphContents_cphContents_txtSearchPlayerName")
#             element.send_keys(player)

#             # click the search button
#             driver.find_element_by_xpath('//*[@id="cphContents_cphContents_cphContents_btnSearch"]').click()
#             time.sleep(2)
            
#         player_info_url.append(temp_ls)
#     driver.close()
    
# # add to dataframe
# korean_majorleaguer_df['info_url'] = player_info_url


In [200]:
korean_majorleaguer_df

Unnamed: 0,Name,Birth_year,Position
0,강정호,1987,내야수
1,구대성,1969,투수
2,김광현,1988,투수
3,김병현,1979,투수
4,김선우,1977,투수
5,김하성,1995,내야수
6,김현수,1988,외야수
7,류제국,1983,투수
8,류현진,1987,투수
9,박병호,1986,내야수


In [201]:
# url = 'https://www.koreabaseball.com/Player/Search.aspx'
# headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'}
# data = {'MIME Type': 'application/x-www-form-urlencoded; charset=utf-8',
# 'ctl00$ctl00$ctl00$cphContents$cphContents$ScriptManager1': 'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$udpRecord|ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$btnSearch',
# 'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$hfPage': 1,
# 'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$txtSearchPlayerName': '이상훈',
# '__EVENTTARGET': 'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$btnSearch',
# '__VIEWSTATE': '/wEPDwUJMTMxNzk5NDM2D2QWAmYPZBYCZg9kFgJmD2QWAgIBD2QWAmYPZBYCAgEPZBYCAgUPZBYCAgEPZBYCZg9kFgpmDxAPFgYeDURhdGFUZXh0RmllbGQFCEZJUlNUX05NHg5EYXRhVmFsdWVGaWVsZAUEVF9JRB4LXyFEYXRhQm91bmRnZBAVCwrtjIAg7ISg7YOdAk5DBuuRkOyCsAJLVAJMRwbtgqTsm4ADS0lBBuuhr+uNsAbsgrzshLECU0sG7ZWc7ZmUFQsAAk5DAk9CAktUAkxHAldPAkhUAkxUAlNTAlNLAkhIFCsDC2dnZ2dnZ2dnZ2dnFgFmZAIBDxBkZBYBZmQCAg8PZBYCHgpvbmtleXByZXNzBXlpZihldmVudC5rZXlDb2RlID09IDEzKXtfX2RvUG9zdEJhY2soJ2N0bDAwJGN0bDAwJGN0bDAwJGNwaENvbnRlbnRzJGNwaENvbnRlbnRzJGNwaENvbnRlbnRzJGJ0blNlYXJjaCcsJycpO3JldHVybiBmYWxzZTt9ZAIEDxYCHgtfIUl0ZW1Db3VudAIBFgJmD2QWCmYPFQEBI2QCAQ8WAh4EVGV4dAVBPGEgaHJlZj0nL1JlY29yZC9SZXRpcmUvSGl0dGVyLmFzcHg/cGxheWVySWQ9NzYzMjUnPuqwleygle2YuDwvYT5kAgIPFQMG64Sl7IS8CeuCtOyVvOyImAoxOTg3LTA0LTA1ZAIDDxYCHwUFCzE4M2NtLCA5NmtnZAIEDxUBUe2ZlOygley0iC3rrLTrk7HspJEt6rSR7KO87KCc7J286rOgLSjrgqjrtoDrjIApLe2YhOuMgC3smrDrpqwt7Z6I7Ja066Gc7KaILeuEpeyEvGQCBQ8PFgYeCVBhZ2VJbmRleAUBMR4IUGFnZVNpemUFAjIwHg1Ub3RhbFJvd0NvdW50AgFkFhwCAQ8PFgIeB1Zpc2libGVoZGQCAw8PFgIfCWhkZAIFDw8WBh8FBQExHghDc3NDbGFzcwUCb24eBF8hU0ICAmRkAgcPDxYIHwUFATIfCmUfCwICHwloZGQCCQ8PFggfBQUBMx8KZR8LAgIfCWhkZAILDw8WCB8FBQE0HwplHwsCAh8JaGRkAg0PDxYIHwUFATUfCmUfCwICHwloZGQCDw8PFgYfCmUfCwICHwloZGQCEQ8PFgYfCmUfCwICHwloZGQCEw8PFgYfCmUfCwICHwloZGQCFQ8PFgYfCmUfCwICHwloZGQCFw8PFgYfCmUfCwICHwloZGQCGQ8PFgIfCWhkZAIbDw8WAh8JaGRkZImkjX7rxMslCyeok2MF9BQ8qu/+P5gJjUM8hUnKJJNG',
# '__VIEWSTATEGENERATOR': '6942A5F7',
# '__EVENTVALIDATION': '/wEdABW1u8CJ6iwHrfjPPWNlkkQ8Q2B4FJs0ospwpODzYcZnkwEb89/cjvkLespfzFW0TCj6RDwCo8sThGuE7pkUKp4Ers/Mpg0cmfWLHjUb0iE5C4lcepFRdFRmSUFma4Vamw10ID5u32Wu9DsGe3ew6dVgq4ym2DyQjvtJtTj77nmU2SxU8kSoyaew+8cH10gkal/mFdUWslBO5btpZyMqdhF4JY/ImRWG9e44Fizmqo6gKVVlRfF3+6ImEs0Rr71fGadixqwW37sV/ACzqDxrfptqmN8UN67Y0NzXekKR1wwa4YzUt0HUF0hKGI30fWyS9XgXUmB2NNGHmh4A3piDkzwQumasTdki0Nxrfp/T+TKcdAhwEXCOcAoDsKjQoDqWzQm67A1wtdv0d/eP2rgGfHTFIgSynfy/m1AkZNqRiApT0alCuvpkbHcYZzQgPlhttdroVfpp4oVsMEal7zCWex2myKoPjcjFSPSmpOUxFK8q3g==',
# '__ASYNCPOST': 'true'
# }

# html=requests.post(url, data=data, headers=headers)
# from bs4 import BeautifulSoup as bs
# soup = bs(html.content, 'html.parser')
# print(soup.prettify())

In [204]:
# player_info_url = []
# base_url = 'https://www.koreabaseball.com'

# for player_name in korean_majorleaguer_df.loc[:]['Name'].to_list():
#     temp_ls = []
#     url = 'https://www.koreabaseball.com/Player/Search.aspx'
#     _headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'}
#     _data = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
#             'ctl00$ctl00$ctl00$cphContents$cphContents$ScriptManager1': 'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$udpRecord|ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$btnSearch',
#     '__VIEWSTATE': '/wEPDwUJMTMxNzk5NDM2D2QWAmYPZBYCZg9kFgJmD2QWAgIBD2QWAmYPZBYCAgEPZBYCAgUPZBYCAgEPZBYCZg9kFghmDxAPFgYeDURhdGFUZXh0RmllbGQFCEZJUlNUX05NHg5EYXRhVmFsdWVGaWVsZAUEVF9JRB4LXyFEYXRhQm91bmRnZBAVCwrtjIAg7ISg7YOdAk5DBuuRkOyCsAJLVAJMRwbtgqTsm4ADS0lBBuuhr+uNsAbsgrzshLECU0sG7ZWc7ZmUFQsAAk5DAk9CAktUAkxHAldPAkhUAkxUAlNTAlNLAkhIFCsDC2dnZ2dnZ2dnZ2dnFgFmZAIBDxBkZBYBZmQCAg8PZBYCHgpvbmtleXByZXNzBXlpZihldmVudC5rZXlDb2RlID09IDEzKXtfX2RvUG9zdEJhY2soJ2N0bDAwJGN0bDAwJGN0bDAwJGNwaENvbnRlbnRzJGNwaENvbnRlbnRzJGNwaENvbnRlbnRzJGJ0blNlYXJjaCcsJycpO3JldHVybiBmYWxzZTt9ZAIFDw8WBh4JUGFnZUluZGV4BQExHghQYWdlU2l6ZQUCMjAeDVRvdGFsUm93Q291bnRmZBYcAgEPDxYCHgdWaXNpYmxlaGRkAgMPDxYCHwdoZGQCBQ8PFgYeBFRleHQFATEeCENzc0NsYXNzBQJvbh4EXyFTQgICZGQCBw8PFggfCAUBMh8JZR8KAgIfB2hkZAIJDw8WCB8IBQEzHwllHwoCAh8HaGRkAgsPDxYIHwgFATQfCWUfCgICHwdoZGQCDQ8PFggfCAUBNR8JZR8KAgIfB2hkZAIPDw8WBh8JZR8KAgIfB2hkZAIRDw8WBh8JZR8KAgIfB2hkZAITDw8WBh8JZR8KAgIfB2hkZAIVDw8WBh8JZR8KAgIfB2hkZAIXDw8WBh8JZR8KAgIfB2hkZAIZDw8WAh8HaGRkAhsPDxYCHwdoZGRk1A6xuCh0ccetMvmoHty6C+iZPJRR4TUTclPN2WV25KM=',
#     '__VIEWSTATEGENERATOR': '6942A5F7',
#     '__EVENTVALIDATION': '/wEdABV25zroy0y+ySxAb8qg0Lh+qUK6+mRsdxhnNCA+WG212kNgeBSbNKLKcKTg82HGZ5MBG/Pf3I75C3rKX8xVtEwo+kQ8AqPLE4RrhO6ZFCqeBK7PzKYNHJn1ix41G9IhOQuJXHqRUXRUZklBZmuFWpsNdCA+bt9lrvQ7Bnt3sOnVYKuMptg8kI77SbU4++55lNksVPJEqMmnsPvHB9dIJGpf5hXVFrJQTuW7aWcjKnYReCWPyJkVhvXuOBYs5qqOoClVZUXxd/uiJhLNEa+9XxmnYsasFt+7FfwAs6g8a36bapjfFDeu2NDc13pCkdcMGuGM1LdB1BdIShiN9H1skvV4F1JgdjTRh5oeAN6Yg5M8ELpmrE3ZItDca36f0/kynHQIcBFwjnAKA7Co0KA6ls0JuuwNcLXb9Hf3j9q4Bnx0xSIEsp38v5tQJGTakYgKU9EJBIfUBLFxAijQLbL4FCib7tDqjkD5YMtH7xvNaxZaMg==',
#     'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$hfPage': 1,
#     'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$txtSearchPlayerName': '{0}'.format(player_name),
#     '__ASYNCPOST': 'true',
#     'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$btnSearch': '검색'}

#     html = requests.post(url, data=_data, headers=_headers)
#     soup = bs(html.text, 'html.parser')
    
#     for link in soup.find_all('a'):
#         temp_ls.append(base_url + link.get('href'))
#     temp_ls.pop()
    
#     player_info_url.append(temp_ls)
# player_info_url

In [205]:
import re
base_url = 'https://www.koreabaseball.com'
player_info_url = []

for idx, player_name in enumerate(korean_majorleaguer_df.loc[:]['Name'].to_list()):
    url = 'https://www.koreabaseball.com/Player/Search.aspx'
    _headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'}
    _data = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'ctl00$ctl00$ctl00$cphContents$cphContents$ScriptManager1': 'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$udpRecord|ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$btnSearch',
    '__VIEWSTATE': '/wEPDwUJMTMxNzk5NDM2D2QWAmYPZBYCZg9kFgJmD2QWAgIBD2QWAmYPZBYCAgEPZBYCAgUPZBYCAgEPZBYCZg9kFghmDxAPFgYeDURhdGFUZXh0RmllbGQFCEZJUlNUX05NHg5EYXRhVmFsdWVGaWVsZAUEVF9JRB4LXyFEYXRhQm91bmRnZBAVCwrtjIAg7ISg7YOdAk5DBuuRkOyCsAJLVAJMRwbtgqTsm4ADS0lBBuuhr+uNsAbsgrzshLECU0sG7ZWc7ZmUFQsAAk5DAk9CAktUAkxHAldPAkhUAkxUAlNTAlNLAkhIFCsDC2dnZ2dnZ2dnZ2dnFgFmZAIBDxBkZBYBZmQCAg8PZBYCHgpvbmtleXByZXNzBXlpZihldmVudC5rZXlDb2RlID09IDEzKXtfX2RvUG9zdEJhY2soJ2N0bDAwJGN0bDAwJGN0bDAwJGNwaENvbnRlbnRzJGNwaENvbnRlbnRzJGNwaENvbnRlbnRzJGJ0blNlYXJjaCcsJycpO3JldHVybiBmYWxzZTt9ZAIFDw8WBh4JUGFnZUluZGV4BQExHghQYWdlU2l6ZQUCMjAeDVRvdGFsUm93Q291bnRmZBYcAgEPDxYCHgdWaXNpYmxlaGRkAgMPDxYCHwdoZGQCBQ8PFgYeBFRleHQFATEeCENzc0NsYXNzBQJvbh4EXyFTQgICZGQCBw8PFggfCAUBMh8JZR8KAgIfB2hkZAIJDw8WCB8IBQEzHwllHwoCAh8HaGRkAgsPDxYIHwgFATQfCWUfCgICHwdoZGQCDQ8PFggfCAUBNR8JZR8KAgIfB2hkZAIPDw8WBh8JZR8KAgIfB2hkZAIRDw8WBh8JZR8KAgIfB2hkZAITDw8WBh8JZR8KAgIfB2hkZAIVDw8WBh8JZR8KAgIfB2hkZAIXDw8WBh8JZR8KAgIfB2hkZAIZDw8WAh8HaGRkAhsPDxYCHwdoZGRk1A6xuCh0ccetMvmoHty6C+iZPJRR4TUTclPN2WV25KM=',
    '__VIEWSTATEGENERATOR': '6942A5F7',
    '__EVENTVALIDATION': '/wEdABV25zroy0y+ySxAb8qg0Lh+qUK6+mRsdxhnNCA+WG212kNgeBSbNKLKcKTg82HGZ5MBG/Pf3I75C3rKX8xVtEwo+kQ8AqPLE4RrhO6ZFCqeBK7PzKYNHJn1ix41G9IhOQuJXHqRUXRUZklBZmuFWpsNdCA+bt9lrvQ7Bnt3sOnVYKuMptg8kI77SbU4++55lNksVPJEqMmnsPvHB9dIJGpf5hXVFrJQTuW7aWcjKnYReCWPyJkVhvXuOBYs5qqOoClVZUXxd/uiJhLNEa+9XxmnYsasFt+7FfwAs6g8a36bapjfFDeu2NDc13pCkdcMGuGM1LdB1BdIShiN9H1skvV4F1JgdjTRh5oeAN6Yg5M8ELpmrE3ZItDca36f0/kynHQIcBFwjnAKA7Co0KA6ls0JuuwNcLXb9Hf3j9q4Bnx0xSIEsp38v5tQJGTakYgKU9EJBIfUBLFxAijQLbL4FCib7tDqjkD5YMtH7xvNaxZaMg==',
    'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$hfPage': 1,
    'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$txtSearchPlayerName': '{0}'.format(player_name),
    '__ASYNCPOST': 'true',
    'ctl00$ctl00$ctl00$cphContents$cphContents$cphContents$btnSearch': '검색'}

    html = requests.post(url, data=_data, headers=_headers)
    soup = bs(html.text, 'html.parser')

    html_ls = []
    url_ls = []

    for td in soup.find_all('td'):
        # string between <td>(head) and </td>(tail)
        html_ls.append(str(td)[4:-5])
        if str(td)[4:8] == korean_majorleaguer_df.loc[idx]['Birth_year']:
            #print(temp_ls[-4])
            url_ls.append(base_url + re.findall(r'\"(.+?)\"', html_ls[-4])[0])

    player_info_url.append(url_ls)
    
korean_majorleaguer_df['info_url'] = player_info_url

In [211]:
for idx, name, url in zip(korean_majorleaguer_df.index,
                          korean_majorleaguer_df.loc[:]['Name'].to_list(), 
                          korean_majorleaguer_df.loc[:]['info_url'].to_list()):
    if len(url) >= 2:
        print("url 여러 개: ", name, ' ', idx)

url 여러 개:  이상훈   16


In [224]:
print(korean_majorleaguer_df.loc[16]['info_url'][:])
# 두 개의 url 확인 결과 두 번째 url이 메이저리거 이상훈 선수에 대한 데이터였다.

a = korean_majorleaguer_df.loc[16]['info_url'][1]
print([a])
korean_majorleaguer_df['info_url'][16] = [korean_majorleaguer_df.loc[16]['info_url'][1]]

['https://www.koreabaseball.com/Record/Retire/Pitcher.aspx?playerId=90851', 'https://www.koreabaseball.com/Record/Retire/Pitcher.aspx?playerId=93147']
['https://www.koreabaseball.com/Record/Retire/Pitcher.aspx?playerId=93147']


In [226]:
korean_majorleaguer_df

Unnamed: 0,Name,Birth_year,Position,info_url
0,강정호,1987,내야수,[https://www.koreabaseball.com/Record/Retire/H...
1,구대성,1969,투수,[https://www.koreabaseball.com/Record/Retire/P...
2,김광현,1988,투수,[https://www.koreabaseball.com/Record/Retire/P...
3,김병현,1979,투수,[https://www.koreabaseball.com/Record/Retire/P...
4,김선우,1977,투수,[https://www.koreabaseball.com/Record/Retire/P...
5,김하성,1995,내야수,[https://www.koreabaseball.com/Record/Player/H...
6,김현수,1988,외야수,[https://www.koreabaseball.com/Record/Player/H...
7,류제국,1983,투수,[https://www.koreabaseball.com/Record/Retire/P...
8,류현진,1987,투수,[https://www.koreabaseball.com/Record/Retire/P...
9,박병호,1986,내야수,[https://www.koreabaseball.com/Record/Player/H...
