In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation, ArtistAnimation
from IPython import display

In [2]:
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
def getImage(path):
    return OffsetImage(plt.imread(path), zoom=0.09)

In [3]:
def standings_on(dt:str) -> pd.DataFrame:
#     print("getting", dt)
    date = pd.to_datetime(dt)
    year = date.year
    month = date.month
    day = date.day
    al_df = pd.read_html(f"https://widgets.sports-reference.com/wg.fcgi?css=1&site=br&url=%2Fboxes%2F%3Fyear%3D{year}%26month%3D{month:02}%26day%3D{day:02}&div=div_standings-upto-AL-overall")[0]
    nl_df = pd.read_html(f"https://widgets.sports-reference.com/wg.fcgi?css=1&site=br&url=%2Fboxes%2F%3Fyear%3D{year}%26month%3D{month:02}%26day%3D{day:02}&div=div_standings-upto-NL-overall")[0]
    return_df = pd.concat([nl_df,al_df]).reset_index(drop=True)
    return_df["dt"] = date
    return return_df

In [4]:
# standings_on(pd.to_datetime('9/30/2021'))

In [5]:
# every_day_of_season = pd.date_range(start='4/1/2021', end='10/1/2021')

In [16]:
# %%time
# df = pd.concat([ standings_on(str(d)) for d in every_day_of_season ])

In [17]:
# df.to_csv("standings_on_each_day_of_2021_mlb_season.csv", index=False)

In [18]:
df = pd.read_csv("standings_on_each_day_of_2021_mlb_season.csv")

In [19]:
df.tail()

Unnamed: 0,Tm,W,L,W-L%,GB,RS,RA,pythW-L%,dt
5515,LAA,75,84,0.472,23.0,710,794,0.449,2021-10-01
5516,KCR,73,86,0.459,25.0,672,771,0.437,2021-10-01
5517,MIN,71,88,0.447,27.0,712,820,0.436,2021-10-01
5518,TEX,59,100,0.371,39.0,612,798,0.381,2021-10-01
5519,BAL,52,107,0.327,46.0,650,928,0.343,2021-10-01


In [20]:
df["games_played"] = df.W + df.L
df["rs_per_game"] = df.RS/ df.games_played
df["ra_per_game"] = df.RA/ df.games_played

In [38]:
plot_df = df[df["dt"] >= "2021-05-07"]

In [39]:
plot_df.shape

(4440, 12)

In [40]:
plt.xkcd()

<matplotlib.rc_context at 0x7fb2ac57a790>

In [None]:
%%time
fig, ax = plt.subplots(figsize=(16,18))

plt.xlim((3,6))
plt.ylim((3,6))

plot_df = plot_df.sort_values("dt")

teams = plot_df.Tm.unique()

dates = plot_df.dt.unique()

ax.set_xticks(range(3, 7))
ax.set_yticks(range(3, 7))
ax.set_xlabel("Average Runs Scored Per Game", size=25)
ax.set_ylabel("Average Runs Allowed Per Game", size=25)
ax.tick_params(axis='both', labelsize=20)
plt.title("MLB Runs Scored vs Runs Allowed", size=40, pad=30)
plt.tight_layout()

#water mark
plt.annotate('made by u/double_dose_larry',
        xy=(.15, .85), xycoords='figure fraction',
        horizontalalignment='left', verticalalignment='top',
        fontsize=72, alpha=0.1, rotation=-45)

# date
date_text = plt.annotate(dates[0],
        xy=(.66, .8), xycoords='figure fraction',
        horizontalalignment='left', verticalalignment='top',
        fontsize=72)


## poor scoring poor pitching
plt.annotate("poor scoring\npoor pitching",
        xy=(.08, .93), xycoords='figure fraction',
        horizontalalignment='left', verticalalignment='top',
        fontsize=28, c="r")
## good scoring poor pitching
plt.annotate("good scoring\npoor pitching",
        xy=(.8, .93), xycoords='figure fraction',
        horizontalalignment='left', verticalalignment='top',
        fontsize=28, c="r")
## poor scoring good pitching
plt.annotate("poor scoring\ngood pitching",
        xy=(.08, .1), xycoords='figure fraction',
        horizontalalignment='left', verticalalignment='top',
        fontsize=28, c="r")
## good scoring good pitching
plt.annotate("good scoring\ngood pitching",
        xy=(.8, .1), xycoords='figure fraction',
        horizontalalignment='left', verticalalignment='top',
        fontsize=28, c="r")


# plot mean runs allowed
first_day_df = plot_df[plot_df["dt"] == dates[0]]

mean_run_allowed = ax.axhline(plot_df.ra_per_game.mean(), lw=2)
plt.annotate(f'Mean Runs Allowed(season)\n{df.ra_per_game.mean():0.03}',
        xy=(.75, .465), xycoords='figure fraction',
        horizontalalignment='left', verticalalignment='top',
        fontsize=18, c="b")

mean_run_scored = ax.axvline(plot_df.rs_per_game.mean(), lw=2)
plt.annotate(f'Mean Runs Scored(season)\n{df.ra_per_game.mean():0.03}',
        xy=(.53, .94), xycoords='figure fraction',
        horizontalalignment='left', verticalalignment='top',
        fontsize=18, c="b")


def get_xy_team(team, dt):
    frame_df = plot_df[(plot_df["Tm"] == team) & (plot_df["dt"] == dt)]
    return frame_df[["rs_per_game", "ra_per_game"]].iloc[0].values
    

logos = [
    ax.add_artist(
        AnnotationBbox(getImage(f'{tm}.png'),
                      (get_xy_team(tm, dates[0])),
                       frameon=True
                      )
    )
    for tm in teams
]

def animate(dt):
    print(dt)
    for index,tm in enumerate(teams):
        x, y = get_xy_team(tm, dt)
        logo = logos[index]
        logo.xyann = (x,y)
        date_text._text = dt
#         print(tm , dt, x, y)
    

anim = FuncAnimation(fig, animate, frames = dates[1:], interval=150)
# writevideo = anim.FFMpegWriter(fps=60)
anim.save('anim.mp4')
video = anim.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()

2021-05-08
2021-05-08
2021-05-09
2021-05-10
2021-05-11
2021-05-12
2021-05-13
2021-05-14
2021-05-15
2021-05-16
2021-05-17
2021-05-18
2021-05-19
2021-05-20
2021-05-21
2021-05-22
2021-05-23
2021-05-24
2021-05-25
2021-05-26
2021-05-27
2021-05-28
2021-05-29
2021-05-30
2021-05-31
2021-06-01
2021-06-02
2021-06-03
2021-06-04
2021-06-05
2021-06-06
2021-06-07
2021-06-08
2021-06-09
2021-06-10
2021-06-11
2021-06-12
2021-06-13
2021-06-14
2021-06-15
2021-06-16
2021-06-17
2021-06-18
2021-06-19
2021-06-20
2021-06-21
2021-06-22
2021-06-23
2021-06-24
2021-06-25
2021-06-26
2021-06-27
2021-06-28
2021-06-29
2021-06-30
2021-07-01
2021-07-02
2021-07-03
2021-07-04
2021-07-05
2021-07-06
2021-07-07
2021-07-08
2021-07-09
2021-07-10
2021-07-11
2021-07-12
2021-07-13
2021-07-14
2021-07-15
2021-07-16
2021-07-17
2021-07-18
2021-07-19
2021-07-20
2021-07-21
2021-07-22
2021-07-23
2021-07-24
2021-07-25
2021-07-26
2021-07-27
2021-07-28
2021-07-29
2021-07-30
2021-07-31
2021-08-01
2021-08-02
2021-08-03
2021-08-04
2021-08-05

In [None]:
df.groupby("dt").ra_per_game.max().head(40)

In [37]:
df.query("Tm == 'DET'").sort_values("rs_per_game").head(30)

Unnamed: 0,Tm,W,L,W-L%,GB,RS,RA,pythW-L%,dt,games_played,rs_per_game,ra_per_game
989,DET,8,21,0.276,9.5,79,141,0.257,2021-05-03,29,2.724138,4.862069
959,DET,8,21,0.276,9.5,79,141,0.257,2021-05-02,29,2.724138,4.862069
899,DET,8,19,0.296,9.0,75,133,0.26,2021-04-30,27,2.777778,4.925926
929,DET,8,20,0.286,9.5,79,139,0.262,2021-05-01,28,2.821429,4.964286
1019,DET,8,22,0.267,10.5,86,152,0.261,2021-05-04,30,2.866667,5.066667
869,DET,8,18,0.308,8.5,75,123,0.288,2021-04-29,26,2.884615,4.730769
1049,DET,9,22,0.29,9.5,92,157,0.273,2021-05-05,31,2.967742,5.064516
779,DET,7,16,0.304,8.0,69,107,0.309,2021-04-26,23,3.0,4.652174
18,DET,1,0,1.0,--,3,2,0.677,2021-04-01,1,3.0,2.0
48,DET,1,0,1.0,0.5,3,2,0.677,2021-04-02,1,3.0,2.0
