In [1]:
import pandas as pd
import json
import requests
from functools import partial

pd.set_option("display.max_columns", 100)

In [2]:
def get_trade_df(page):
    # make a team map constant based on the js code from the webiste
    # found in this file on line 91
    # https://transactions.mlbtraderumors.com/widget/transactions-tracker&lang=en
    
    TEAM_MAP = {}
    TEAM_MAP[32]="Anaheim Angels";
    TEAM_MAP[1]="Arizona Diamondbacks";
    TEAM_MAP[2]="Atlanta Braves";
    TEAM_MAP[3]="Baltimore Orioles";
    TEAM_MAP[4]="Boston Red Sox";
    TEAM_MAP[5]="Chicago Cubs";
    TEAM_MAP[6]="Chicago White Sox";
    TEAM_MAP[7]="Cincinnati Reds";
    TEAM_MAP[8]="Cleveland Indians";
    TEAM_MAP[9]="Colorado Rockies";
    TEAM_MAP[10]="Detroit Tigers";
    TEAM_MAP[11]="Florida Marlins";
    TEAM_MAP[12]="Houston Astros";
    TEAM_MAP[13]="Kansas City Royals";
    TEAM_MAP[14]="Los Angeles Angels";
    TEAM_MAP[15]="Los Angeles Dodgers";
    TEAM_MAP[31]="Miami Marlins";
    TEAM_MAP[16]="Milwaukee Brewers";
    TEAM_MAP[17]="Minnesota Twins";
    TEAM_MAP[18]="New York Mets";
    TEAM_MAP[19]="New York Yankees";
    TEAM_MAP[20]="Oakland Athletics";
    TEAM_MAP[21]="Philadelphia Phillies";
    TEAM_MAP[22]="Pittsburgh Pirates";
    TEAM_MAP[23]="San Diego Padres";
    TEAM_MAP[24]="San Francisco Giants";
    TEAM_MAP[25]="Seattle Mariners";
    TEAM_MAP[26]="St. Louis Cardinals";
    TEAM_MAP[33]="Tampa Bay Devil Rays";
    TEAM_MAP[27]="Tampa Bay Rays";
    TEAM_MAP[28]="Texas Rangers";
    TEAM_MAP[29]="Toronto Blue Jays";
    TEAM_MAP[30]="Washington Nationals";
    TEAM_MAP[20000]="Japan";
    #my own addition
    TEAM_MAP[0] = "Unknown"
    
    # each requests gets 75 results at a time, use page to paginate
    
    json_response = requests.get(f"https://transactions.mlbtraderumors.com/g/GetTransactions&widget=true&amount_type=4&type_ID=1&lang=41&OFFSET={page * 75}").json()
    df = pd.DataFrame(json_response["transactions"])
    
    # make numbers numeric and ignore errors
    df = df.apply(partial(pd.to_numeric, errors="ignore"))
    
    # map team names
    df["from"] = df.FORMER_TEAM.dropna().astype(int).map(TEAM_MAP)
    df["to"] = df.ACQUIRING_TEAM.dropna().astype(int).map(TEAM_MAP)
    
    # return the df for the page
    return df
    

In [3]:
get_trade_df(1).head(2) # quick test

Unnamed: 0,TRANSACTION_ID,TRANSACTION_TYPE,TRANSACTION_TYPE_ID,TRANSACTION_DATE,C_TRANSACTION_DATE,FREE_AGENT_TYPE_ID,QUALIFYING_OFFER_TYPE_ID,PLAYER_ID,PLAYER_NAME,FORMER_GM,ACQUIRING_GM,FORMER_TEAM,ACQUIRING_TEAM,AGENCY_ID,CONTRACT_YEARS,CONTRACT_AMOUNT,CONTRACT_BUYOUT,MLBTR_LINK,BBREF_TRAN_ID,NOTES,DATE_CREATED,STAFF_CREATED,DATE_UPDATED,STAFF_UPDATED,DATE_DISABLED,from,to
0,63252,Trade,1,2019-08-10 00:00:00,08/10/2019,,,22384,Ian Miller,34,121,25,17,,,,,https://www.mlbtraderumors.com/2019/08/twins-t...,,Twins acquire Ian Miller from Mariners in exch...,2019-08-11 14:14:22,28,"August 11, 2019, 2:14PM",JD Shaw,,Seattle Mariners,Minnesota Twins
1,63245,Trade,1,2019-08-09 00:00:00,08/09/2019,,,9269,Jose Lobaton,34,2,25,15,209.0,,,,https://www.mlbtraderumors.com/2019/08/dodgers...,,Dodgers acquire Jose Lobaton from Mariners in ...,2019-08-10 17:06:08,28,"August 10, 2019, 5:06PM",JD Shaw,,Seattle Mariners,Los Angeles Dodgers


In [4]:
%%time
# get 10 pages worth of data
df = pd.concat( [ get_trade_df(page) for page in range(10)])

CPU times: user 266 ms, sys: 22.1 ms, total: 288 ms
Wall time: 6.78 s


In [5]:
# check the date range
print("from", pd.to_datetime(df.TRANSACTION_DATE).min(), "to", pd.to_datetime(df.TRANSACTION_DATE).max())

from 2018-01-17 00:00:00 to 2020-01-17 00:00:00


In [6]:
# export summary to csv
df.groupby(["from", "to"]).size().reset_index().to_csv("players_traded_teams.csv", index=False)

In [8]:
# export all data to csv
df.to_csv("all_trade_data.csv", index=False)