# CHAPTER2 データからグラフを作成してみよう

## 2-1 分析に必要な知識の習得

In [None]:
# !pip install pybaseball
from pybaseball import statcast

In [None]:
data = statcast(start_dt = '2022-04-01', end_dt='2022-09-30')
data.head()

In [None]:
import os

data_ohtani = data[data['player_name']=='Ohtani, Shohei']
data_ohtani.to_csv('2022_Ohtani.csv')
data_ohtani.head()

## 2-2 投球の球種割合を表す円グラフ

In [None]:
# !pip install japanize_matplotlib
import japanize_matplotlib
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('2022_Ohtani.csv')
df

In [None]:
counts = df['pitch_type'].value_counts()
plt.pie(counts, labels=counts.index, counterclock=False, startangle=90)
plt.title('大谷投手の球種割合')
plt.show()

## 2-3 球種ごとの平均球速を表す棒グラフ

In [None]:
import japanize_matplotlib
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('2022_Ohtani.csv')
df

In [None]:
df['speed_km'] = df['release_speed'] * 1.61
df.head()

In [None]:
Fastball = df[df['pitch_type']=='FF'] #ストレート
Cutter = df[df['pitch_type']=='FC'] #カット
Sweeper = df[df['pitch_type']=='ST'] #スイーパー
Curve = df[df['pitch_type']=='CU'] #カーブ
Splitter = df[df['pitch_type']=='FS'] #スプリット

x = ['ストレート', 'カット', 'スイーパー', 'カーブ', 'スプリット']
y = [Fastball['speed_km'].mean(),
     Cutter['speed_km'].mean(),
     Sweeper['speed_km'].mean(),
     Curve['speed_km'].mean(),
     Splitter['speed_km'].mean()]

plt.bar(x, y)
plt.title('球種ごとの平均球速')
plt.ylim(110, 170)
plt.show()

## 2-4 イニングごとの平均球速を表す折れ線グラフ

In [None]:
import japanize_matplotlib
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('2022_Ohtani.csv')
df.head()

In [None]:
df['speed_km'] = df['release_speed'] * 1.61
df.head()

In [None]:
Fastball = df[df['pitch_type']=='FF']
Fastball.head()

In [None]:
x, y = [], []

for i in range(1, 10):
    inning = Fastball[Fastball['inning']==i]
    x.append(i)
    y.append(inning['speed_km'].mean())

y

In [None]:
plt.plot(x, y)
plt.title('イニングごとのストレート平均球速')
plt.xlabel('イニング（回）')
plt.ylabel('ストレートの平均球速（km/h）')
plt.show()

## 2-5 球種ごとの球速分布を表すヒストグラム

In [None]:
import japanize_matplotlib
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('2022_Ohtani.csv')
df['speed_km'] = df['release_speed'] * 1.61

Fastball = df[df['pitch_type']=='FF'] #ストレート
Cutter = df[df['pitch_type']=='FC'] #カット
Sweeper = df[df['pitch_type']=='ST'] #スイーパー
Curve = df[df['pitch_type']=='CU'] #カーブ
Splitter = df[df['pitch_type']=='FS'] #スプリット

border = [110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165]

plt.hist(Fastball['speed_km'], alpha=0.7, hatch='/', label='ストレート', bins=border)
plt.hist(Cutter['speed_km'], alpha=0.7, hatch='o', label='カット', bins=border)
plt.hist(Sweeper['speed_km'], alpha=0.7, hatch='.', label='スイーパー', bins=border)
plt.hist(Curve['speed_km'], alpha=0.7, hatch='-', label='カーブ', bins=border)
plt.hist(Splitter['speed_km'], alpha=0.7, hatch='*', label='スプリット', bins=border)

plt.title('球種ごとの球速分布')
plt.xlabel('球速（km/h）')
plt.ylabel('投球数')
plt.legend(loc='upper left')
plt.show()

## 2-6 投球の到達位置を表す散布図

In [None]:
import japanize_matplotlib
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('2022_Ohtani.csv')
df['plate_x_cm'] = df['plate_x'] * 30.48
df['plate_z_cm'] = df['plate_z'] * 30.48
df.head()

In [None]:
Fastball = df[df['pitch_type']=='FF']
Fastball.head()

In [None]:
plt.figure(figsize=(10,10))
plt.scatter(Fastball['plate_x_cm'], Fastball['plate_z_cm'])

plt.title('ストレートの投球位置（捕手目線）')
plt.xlabel('投球のコース（cm）')
plt.ylabel('投球の高さ（cm）')
plt.xlim(-80, 80)
plt.ylim(0, 150)
plt.hlines(y=[50, 110], xmin=-30, xmax=30, color='black')
plt.vlines(x=[-30, 30], ymin=50, ymax=110, color='black')
plt.show()

In [None]:
Swinging = Fastball[Fastball['description'].isin(['swinging_strike', 'swinging_strike_blocked'])]
Called = Fastball[Fastball['description']=='called_strike']

plt.figure(figsize=(10,10))
plt.scatter(Swinging['plate_x_cm'], Swinging['plate_z_cm'], color='red', label='空振り')
plt.scatter(Called['plate_x_cm'], Called['plate_z_cm'], color='blue', label='見逃し')

plt.title('ストレートの投球位置')
plt.xlabel('投球のコース（cm）')
plt.ylabel('投球の高さ（cm）')
plt.legend(loc='upper right')
plt.hlines(y=[50, 110], xmin=-30, xmax=30, color='black')
plt.vlines(x=[-30, 30], ymin=50, ymax=110, color='black')
plt.xlim(-80, 80)
plt.ylim(0, 150)
plt.show()

## 2-7 投球の到達位置を表すヒートマップ

In [None]:
import japanize_matplotlib
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('2022_Ohtani.csv')
df['plate_x_cm'] = df['plate_x'] * 30.48
df['plate_z_cm'] = df['plate_z'] * 30.48

Fastball = df[df['pitch_type']=='FF']

location = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
total = 0
for height, side in zip(Fastball['plate_z_cm'], Fastball['plate_x_cm']):
    
    if height < 50 or 110 < height:
        continue
    if side < -30 or 30 < side:
        continue
    
    if 90 < height <= 110:
        h = 0
    elif 70 <= height <= 90:
        h = 1
    elif 50 <= height < 70:
        h = 2

    if -30 <= side < -10:
        s = 0
    elif -10 <= side <= 10:
        s = 1
    elif 10 < side <= 30:
        s = 2

    location[h][s] += 1
    total += 1

location

In [None]:
ratio = []

for lst in location:
    add = [num/total for num in lst]
    ratio.append(add)

ratio

In [None]:
plt.figure(figsize=(8, 8))
plt.imshow(ratio, cmap='OrRd')

plt.colorbar()
plt.clim(0, 0.2)
plt.title('ストレートのコース別投球割合')
plt.xticks([0, 1, 2], ['三塁側', '真ん中', '一塁側'])
plt.yticks([0, 1, 2], ['高め', '真ん中', '低め'])
plt.show()

In [None]:
def create_heatmap(data, title):
    # コース別に集計するためのリスト作成
    location = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
    sum = 0

    for height, side in zip(Fastball['plate_z_cm'], Fastball['plate_x_cm']):
        
        if height < 50 or 110 < height:
            continue
        if side < -30 or 30 < side:
            continue
        
        if 90 < height <= 110:
            h = 0
        elif 70 <= height <= 90:
            h = 1
        elif 50 <= height < 70:
            h = 2
    
        if -30 <= side < -10:
            s = 0
        elif -10 <= side <= 10:
            s = 1
        elif 10 < side <= 30:
            s = 2
    
        location[h][s] += 1
        sum += 1

    # コース別の投球割合を計算
    ratio = []

    for lst in location:
        add = [num/sum for num in lst]
        ratio.append(add)

    # ヒートマップの作成
    plt.figure(figsize=(8, 8))
    plt.imshow(ratio, cmap='OrRd')

    # 見た目の調整
    plt.colorbar()
    plt.clim(0, 0.2)
    plt.title(title)
    plt.xticks([0, 1, 2], ['三塁側', '真ん中', '一塁側'])
    plt.yticks([0, 1, 2], ['高め', '真ん中', '低め'])
    plt.show()

In [None]:
Fastball_Right = Fastball[Fastball['stand']=='R']
Fastball_Left = Fastball[Fastball['stand']=='L']

create_heatmap(Fastball_Right, 'ストレートのコース別投球割合（対右打者）')
create_heatmap(Fastball_Left, 'ストレートのコース別投球割合（対左打者）')

## 2-8 リリースポイントを表す箱ひげ図

In [None]:
import japanize_matplotlib
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('2022_Ohtani.csv')
df['release_pos_x_cm'] = df['release_pos_x'] * 30.48
df['release_pos_z_cm'] = df['release_pos_z'] * 30.48

Fastball = df[df['pitch_type']=='FF'] #ストレート
Sweeper = df[df['pitch_type']=='ST'] #スイーパー
Splitter = df[df['pitch_type']=='FS'] #スプリット
Curve = df[df['pitch_type']=='CU'] #カーブ

plt.boxplot([Fastball['release_pos_z_cm'],
             Sweeper['release_pos_z_cm'],
             Splitter['release_pos_z_cm'],
             Curve['release_pos_z_cm']],
             labels=['ストレート', 'スイーパー', 'スプリット', 'カーブ'])
plt.ylabel('上下リリース位置（cm）')
plt.title('球種別のリリース位置（高さ）')
plt.show()

In [None]:
plt.boxplot([Fastball['release_pos_z_cm'],
             Sweeper['release_pos_z_cm'],
             Splitter['release_pos_z_cm'],
             Curve['release_pos_z_cm']],
             labels=['ストレート', 'スイーパー', 'スプリット', 'カーブ'],
             vert=False)
plt.ylabel('上下リリース位置（cm）')
plt.title('球種別のリリース位置（高さ）')
plt.show()