Skip to content

Commit

Permalink
Added docstrings to any undocumented public-facing functions (#340)
Browse files Browse the repository at this point in the history
* Added docstrings to any undocuments public-facing functions

* responded to initial round of feedback
  • Loading branch information
erin2722 committed Mar 28, 2023
1 parent a0e5c4a commit 3ccd099
Show file tree
Hide file tree
Showing 10 changed files with 256 additions and 22 deletions.
10 changes: 10 additions & 0 deletions pybaseball/amateur_draft.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ def get_draft_results(year: int, draft_round: int) -> pd.DataFrame:

@cache.df_cache()
def amateur_draft(year: int, draft_round: int, keep_stats: bool = True) -> pd.DataFrame:
"""
Retrieves the MLB amateur draft results by year and round.
ARGUMENTS
year: The year for which you wish to retrieve draft results.
draft_round: The round for which you wish to retrieve draft results. There is no distinction made
between the competitive balance, supplementary, and main portions of a round.
keep_stats: A boolean parameter that controls whether the major league stats of each draftee is
displayed. Default set to true.
"""
draft_results = get_draft_results(year, draft_round)
draft_results = pd.concat(draft_results)
draft_results = postprocess(draft_results)
Expand Down
9 changes: 9 additions & 0 deletions pybaseball/amateur_draft_by_team.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ def drop_stats(draft_results: pd.DataFrame) -> pd.DataFrame:
def amateur_draft_by_team(
team: str, year: int, keep_stats: bool = True
) -> pd.DataFrame:
"""
Get amateur draft results by team and year.
ARGUMENTS
team: Team code which you want to check. See docs for team codes
(https://github.com/jldbc/pybaseball/blob/master/docs/amateur_draft_by_team.md)
year: Year which you want to check.
"""
draft_results = get_draft_results(team, year)
draft_results = postprocess(draft_results)
if not keep_stats:
Expand Down
7 changes: 7 additions & 0 deletions pybaseball/standings.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ def get_tables(soup: BeautifulSoup, season: int) -> List[pd.DataFrame]:

@cache.df_cache()
def standings(season:Optional[int] = None) -> pd.DataFrame:
"""
Returns a pandas DataFrame of the standings for a given MLB season, or the most recent standings
if the date is not specified.
ARGUMENTS
season (int): the year of the season
"""
# get most recent standings if date not specified
if season is None:
season = most_recent_season()
Expand Down
39 changes: 36 additions & 3 deletions pybaseball/statcast_batter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ def statcast_batter(start_dt: Optional[str] = None, end_dt: Optional[str] = None
Pulls statcast pitch-level data from Baseball Savant for a given batter.
ARGUMENTS
start_dt : YYYY-MM-DD : the first date for which you want a player's statcast data
end_dt : YYYY-MM-DD : the final date for which you want data
player_id : INT : the player's MLBAM ID. Find this by calling pybaseball.playerid_lookup(last_name, first_name), finding the correct player, and selecting their key_mlbam.
start_dt : YYYY-MM-DD : the first date for which you want a player's statcast data
end_dt : YYYY-MM-DD : the final date for which you want data
player_id : INT : the player's MLBAM ID. Find this by calling pybaseball.playerid_lookup(last_name, first_name),
finding the correct player, and selecting their key_mlbam.
"""
start_dt, end_dt, _ = sanitize_input(start_dt, end_dt, player_id)

Expand All @@ -30,6 +31,15 @@ def statcast_batter(start_dt: Optional[str] = None, end_dt: Optional[str] = None

@cache.df_cache()
def statcast_batter_exitvelo_barrels(year: int, minBBE: Union[int, str] = "q") -> pd.DataFrame:
"""
Retrieves batted ball data for all batters in a given year.
ARGUMENTS
year: The year for which you wish to retrieve batted ball data. Format: YYYY.
minBBE: The minimum number of batted ball events for each player. If a player falls
below this threshold, they will be excluded from the results. If no value is specified,
only qualified batters will be returned.
"""
url = f"https://baseballsavant.mlb.com/leaderboard/statcast?type=batter&year={year}&position=&team=&min={minBBE}&csv=true"
res = requests.get(url, timeout=None).content
data = pd.read_csv(io.StringIO(res.decode('utf-8')))
Expand All @@ -38,6 +48,14 @@ def statcast_batter_exitvelo_barrels(year: int, minBBE: Union[int, str] = "q") -

@cache.df_cache()
def statcast_batter_expected_stats(year: int, minPA: Union[int, str] = "q") -> pd.DataFrame:
"""
Retrieves expected stats based on quality of batted ball contact in a given year.
ARGUMENTS
year: The year for which you wish to retrieve expected stats data. Format: YYYY.
minPA: The minimum number of plate appearances for each player. If a player falls below this threshold,
they will be excluded from the results. If no value is specified, only qualified batters will be returned.
"""
url = f"https://baseballsavant.mlb.com/leaderboard/expected_statistics?type=batter&year={year}&position=&team=&min={minPA}&csv=true"
res = requests.get(url, timeout=None).content
data = pd.read_csv(io.StringIO(res.decode('utf-8')))
Expand All @@ -46,6 +64,13 @@ def statcast_batter_expected_stats(year: int, minPA: Union[int, str] = "q") -> p

@cache.df_cache()
def statcast_batter_percentile_ranks(year: int) -> pd.DataFrame:
"""
Retrieves percentile ranks for each player in a given year, including batters with at least 2.1 PA per team
game and 1.25 for pitchers.
ARGUMENTS
year: The year for which you wish to retrieve percentile data. Format: YYYY.
"""
url = f"https://baseballsavant.mlb.com/leaderboard/percentile-rankings?type=batter&year={year}&position=&team=&csv=true"
res = requests.get(url, timeout=None).content
data = pd.read_csv(io.StringIO(res.decode('utf-8')))
Expand All @@ -54,6 +79,14 @@ def statcast_batter_percentile_ranks(year: int) -> pd.DataFrame:

@cache.df_cache()
def statcast_batter_pitch_arsenal(year: int, minPA: int = 25) -> pd.DataFrame:
"""
Retrieves outcome data for batters split by the pitch type in a given year.
ARGUMENTS
year: The year for which you wish to retrieve pitch arsenal data. Format: YYYY.
minPA: The minimum number of plate appearances for each player. If a player falls below this threshold,
they will be excluded from the results. If no value is specified, the default number of plate appearances is 25.
"""
url = f"https://baseballsavant.mlb.com/leaderboard/pitch-arsenal-stats?type=batter&pitchType=&year={year}&team=&min={minPA}&csv=true"
res = requests.get(url, timeout=None).content
data = pd.read_csv(io.StringIO(res.decode('utf-8')))
Expand Down
55 changes: 50 additions & 5 deletions pybaseball/statcast_fielding.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from . import cache
from .utils import norm_positions, sanitize_statcast_columns

"""Scrapes outs above average from baseball savant for a given year and position
@cache.df_cache()
def statcast_outs_above_average(year: int, pos: Union[int, str], min_att: Union[int, str] = "q", view: str = "Fielder") -> pd.DataFrame:
"""Scrapes outs above average from baseball savant for a given year and position
Args:
year (int): Season to pull
Expand All @@ -23,10 +25,7 @@
Returns:
pd.DataFrame: Dataframe of defensive OAA for the given year and position for players who have met
the given threshold
"""

@cache.df_cache()
def statcast_outs_above_average(year: int, pos: Union[int, str], min_att: Union[int, str] = "q", view: str = "Fielder") -> pd.DataFrame:
"""
pos = norm_positions(pos)
# catcher is not included in this leaderboard
if pos == "2":
Expand All @@ -39,6 +38,15 @@ def statcast_outs_above_average(year: int, pos: Union[int, str], min_att: Union[

@cache.df_cache()
def statcast_outfield_directional_oaa(year: int, min_opp: Union[int, str] = "q") -> pd.DataFrame:
"""
Retrieves outfielders' directional OAA data for the given year and number of opportunities. The directions are
Back Left, Back, Back Right, In Left, In, and In Right.
ARGUMENTS
year: The year for which you wish to retrieve batted ball against data. Format: YYYY.
min_opp: The minimum number of opportunities for the player to be included in the result. Statcast's
default is players with at least 1 fielding attempt per game.
"""
url = f"https://baseballsavant.mlb.com/directional_outs_above_average?year={year}&min={min_opp}&team=&csv=true"
res = requests.get(url, timeout=None).content
data = pd.read_csv(io.StringIO(res.decode('utf-8')))
Expand All @@ -47,6 +55,15 @@ def statcast_outfield_directional_oaa(year: int, min_opp: Union[int, str] = "q")

@cache.df_cache()
def statcast_outfield_catch_prob(year: int, min_opp: Union[int, str] = "q") -> pd.DataFrame:
"""
Retrieves aggregated data for outfielder performance on fielding attempt types, binned into five star categories,
for the given year and number of opportunities.
ARGUMENTS
year: The year for which you wish to retrieve batted ball against data. Format: YYYY.
min_opp: The minimum number of opportunities for the player to be included in the result. Statcast's
default is players with at least 1 fielding attempt per game.
"""
url = f"https://baseballsavant.mlb.com/leaderboard/catch_probability?type=player&min={min_opp}&year={year}&total=&csv=true"
res = requests.get(url, timeout=None).content
data = pd.read_csv(io.StringIO(res.decode('utf-8')))
Expand All @@ -55,6 +72,15 @@ def statcast_outfield_catch_prob(year: int, min_opp: Union[int, str] = "q") -> p

@cache.df_cache()
def statcast_outfielder_jump(year: int, min_att: Union[int, str] = "q") -> pd.DataFrame:
"""
Retrieves data on outfielder's jump to the ball for the given year and number of attempts. Jump is calculated
only for two star or harder plays (90% or less catch probabiility).
ARGUMENTS
year: The year for which you wish to retrieve batted ball against data. Format: YYYY.
min_att: The minimum number of attempts for the player to be included in the result. Statcast's default
is players with at least 2 two star or harder fielding attempts per team game / 5.
"""
url = f"https://baseballsavant.mlb.com/leaderboard/outfield_jump?year={year}&min={min_att}&csv=true"
res = requests.get(url, timeout=None).content
data = pd.read_csv(io.StringIO(res.decode('utf-8')))
Expand All @@ -63,6 +89,16 @@ def statcast_outfielder_jump(year: int, min_att: Union[int, str] = "q") -> pd.Da

@cache.df_cache()
def statcast_catcher_poptime(year: int, min_2b_att: int = 5, min_3b_att: int = 0) -> pd.DataFrame:
"""
Retrieves pop time data for catchers given year and minimum stolen base attempts for second and third base.
Pop time is measured as the time from the moment the ball hits the catcher's mitt to when it reaches the projected
receiving point at the center of the fielder's base.
ARGUMENTS
year: The year for which you wish to retrieve batted ball against data. Format: YYYY.
min_2b_att: The minimum number of stolen base attempts for second base against the catcher. Statcast's default is 5.
min_3b_att: The minimum number of stolen base attempts for third base against the catcher. Statcast's default is 0.
"""
# currently no 2020 data
url = f"https://baseballsavant.mlb.com/leaderboard/poptime?year={year}&team=&min2b={min_2b_att}&min3b={min_3b_att}&csv=true"
res = requests.get(url, timeout=None).content
Expand All @@ -71,6 +107,15 @@ def statcast_catcher_poptime(year: int, min_2b_att: int = 5, min_3b_att: int = 0

@cache.df_cache()
def statcast_catcher_framing(year: int, min_called_p: Union[int, str] = "q") -> pd.DataFrame:
"""
Retrieves the catcher's framing results for the given year and minimum called pitches. It uses eight zones around
the strike zone (aka "shadow zone") and gives the percentage of time the catcher gets the strike called in each zone.
ARGUMENTS
year: The year for which you wish to retrieve batted ball against data. Format: YYYY.
min_called_p: The minimum number of called pitches for the catcher in the shadow zone. Statcast's default
is players with at least 6 called pitches in the shadow zone per team game.
"""
url = f"https://baseballsavant.mlb.com/catcher_framing?year={year}&team=&min={min_called_p}&sort=4,1&csv=true"
res = requests.get(url, timeout=None).content
data = pd.read_csv(io.StringIO(res.decode('utf-8')))
Expand Down
Loading

0 comments on commit 3ccd099

Please sign in to comment.