In [50]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

In [51]:
#データの取得
url_c = 'https://npb.jp/bis/2024/stats/bat_c.html'
res = requests.get(url_c)
soup = BeautifulSoup(res.content, 'html.parser')

In [52]:
#データの収集
table = soup.findAll('table')[0]
rows = table.find_all('tr')[2:]
data = []
for row in rows:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append(cols)

In [53]:
# データを整形する
columns = ['順位', '選手', 'チーム', '打率', '試合', '打席数', '打数', '得点', '安打', '二塁打', '三塁打', '本塁打', '塁打', '打点', '盗塁', '盗塁刺', '犠打', '犠飛', '四球', '故意四', '死球', '三振', '併殺打', '長打率', '出塁率']
    
df = pd.DataFrame(data, columns=columns)
df.to_csv('baseball.csv', index=False)
df.head()

Unnamed: 0,順位,選手,チーム,打率,試合,打席数,打数,得点,安打,二塁打,...,盗塁刺,犠打,犠飛,四球,故意四,死球,三振,併殺打,長打率,出塁率
0,1,サンタナ,(ヤ),0.309,70,276,243,31,75,17,...,0,0,2,29,1,2,62,6,0.514,0.384
1,2,丸　佳浩,(巨),0.3,68,278,240,21,72,6,...,2,0,2,36,2,0,40,5,0.383,0.388
2,3,細川　成也,(中),0.295,71,297,261,24,77,18,...,1,0,1,32,1,3,72,10,0.467,0.377
3,4,小園　海斗,(広),0.292,67,278,257,23,75,4,...,7,2,2,15,3,2,23,3,0.342,0.333
4,5,野間　峻祥,(広),0.286,53,218,196,21,56,8,...,5,1,0,19,0,2,19,2,0.383,0.355


In [54]:
# 与えられたデータをDataFrameとして定義する
data = {
    '打率順位': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    '選手': ['サンタナ', '丸 佳浩', '小園 海斗', '細川 成也', '野間 峻祥', '長岡 秀樹', '牧 秀悟', '秋山 翔吾', '宮﨑 敏郎', '吉川 尚輝'],
    'チーム': ['(ヤ)', '(巨)', '(広)', '(中)', '(広)', '(ヤ)', '(デ)', '(広)', '(デ)', '(巨)'],
    '打率': [0.313, 0.304, 0.298, 0.296, 0.286, 0.280, 0.275, 0.275, 0.269, 0.268],
    '打数': [233, 230, 248, 250, 185, 250, 207, 240, 182, 257],
    '得点': [31, 21, 23, 22, 20, 24, 31, 21, 21, 24],
    '安打': [73, 70, 74, 74, 53, 70, 57, 66, 49, 69],
    '二塁打': [17, 6, 4, 18, 8, 12, 11, 9, 13, 12],
    '三塁打': [0, 1, 3, 0, 1, 1, 0, 0, 0, 1],
    '本塁打': [11, 4, 1, 8, 1, 3, 8, 2, 5, 2],
    '塁打': [123, 90, 87, 116, 72, 93, 92, 81, 77, 89],
    '打点': [38, 22, 25, 30, 15, 23, 27, 15, 20, 20],
    '盗塁': [1, 4, 7, 0, 4, 1, 8, 2, 0, 5],
    '盗塁刺': [0, 2, 6, 1, 5, 0, 0, 3, 1, 3],
    '犠打': [0, 0, 0, 0, 1, 8, 0, 2, 0, 4],
    '犠飛': [2, 2, 1, 1, 0, 2, 1, 2, 0, 2],
    '四球': [28, 36, 15, 30, 18, 16, 21, 14, 23, 23],
    '故意四': [1, 2, 3, 1, 0, 1, 1, 0, 5, 1],
    '死球': [2, 0, 1, 1, 0, 0, 1, 2, 5, 1],
    '三振': [28, 36, 15, 30, 18, 16, 21, 14, 23, 23],
    '併殺打': [1, 2, 3, 1, 0, 1, 1, 0, 5, 1],
    '長打率': [0.528, 0.391, 0.351, 0.464, 0.389, 0.372, 0.444, 0.338, 0.423, 0.346],
    '出塁率': [0.389, 0.396, 0.342, 0.377, 0.356, 0.323, 0.343, 0.313, 0.354, 0.329]
}

In [55]:
# DataFrameを作成
df = pd.DataFrame(data)

In [57]:
# OPSの計算
df['OPS'] = df['出塁率'] + df['長打率']

# ISO_Dの計算
df['ISO_D'] = df['長打率'] - df['打率']

# wOBAの計算
wOBA_numerator = (0.69 * (df['四球'] + df['死球']) + 0.89 * (df['安打'] - df['二塁打'] - df['三塁打'] - df['本塁打']) + 0.72 * df['死球'] + 0.92 * df['二塁打'] + 1.24 * df['三塁打'] + 1.56 * df['本塁打']) / (df['打数'] + df['四球'] + df['死球'] + df['犠飛'])

df['wOBA'] = wOBA_numerator

In [58]:
# 各指標でのランキングを作成するための関数を定義
def create_ranking(df, column):
    ranked_df = df.sort_values(by=column, ascending=False).reset_index(drop=True)
    ranked_df['順位_' + column] = ranked_df.index + 1
    return ranked_df

In [59]:
# OPS、ISO_D、wOBAそれぞれでランキングを作成
ops_ranked1 = create_ranking(df, 'OPS')
iso_d_ranked1 = create_ranking(df, 'ISO_D')
woba_ranked1 = create_ranking(df, 'wOBA')

In [60]:
# 各ランキングをCSVファイルに出力する
ops_ranked1[['打率順位', '選手', 'チーム', 'OPS', '順位_OPS']].to_csv('OPS_ranking.csv', index=False, encoding='utf-8')
iso_d_ranked1[['打率順位', '選手', 'チーム', 'ISO_D', '順位_ISO_D']].to_csv('ISO_D_ranking.csv', index=False, encoding='utf-8')
woba_ranked1[['打率順位', '選手', 'チーム', 'wOBA', '順位_wOBA']].to_csv('wOBA_ranking.csv', index=False, encoding='utf-8')