# Daily update tool
Run once a day after all games are finished

- Scrape data
- Update categories
- Calculate FiFaX
- Scrape video urls of leaders
- Save database
- Tweet top 5s

In [613]:
reset -fs

In [614]:
pip install selenium

Note: you may need to restart the kernel to use updated packages.


In [615]:
from datetime import date, timedelta
from selenium import webdriver
from bs4 import BeautifulSoup
import time, os
import pandas as pd
import numpy as np

Setting date to yesterday (change if target date is not yesterday)

In [616]:
today = date.today()
yesterday = today + timedelta(days = -1)
two_days_ago = yesterday + timedelta(days = -1)
yesterday_date = '-'.join([str(yesterday.year), str(yesterday.month), str(yesterday.day)])
two_dates_ago = '-'.join([str(two_days_ago.year), str(two_days_ago.month), str(two_days_ago.day)])

Setting up the columns of the dataframe

In [617]:
columns = ['date', 't1', 'pitcher', 't2', 'batter', 'inning', 'result', 'pitch_type', 'mph', 
           'rpm', 'vbreak', 'up_down', 'hbreak', 'left_right', 'count']

In [618]:
chromedriver = '/Applications/chromedriver'
os.environ['webdriver.chrome.driver'] = chromedriver
driver = webdriver.Chrome(chromedriver)

  driver = webdriver.Chrome(chromedriver)


In [619]:
target_date = yesterday_date
path = 'https://baseballsavant.mlb.com/gamefeed?date=' + target_date + '&chartType=pitch&legendType=pitchName' + \
        '&playerType=pitcher&inning=&count=&pitchHand=&batSide=&descFilter=&ptFilter=&resultFilter=&hf=pitchVelocity#706856'
driver.get(path)
time.sleep(15)
soup = BeautifulSoup(driver.page_source, 'html.parser')

df = pd.DataFrame(columns = columns)
games = soup.find_all('div', {'class': 'game-container step'})
teams_left = soup.find_all('div', {'class':'team-left'})
teams_right = soup.find_all('div', {'class':'team-right'})
# Iterating through each game in the day
for game, team_left, team_right in zip(games, teams_left, teams_right):
    try:
        pitches = game.find('tbody').find_all('tr')
        t1 = team_left.find('div', {'class':'team-name'}).text.replace('\n', '').strip()
        t2 = team_right.find('div', {'class':'team-name'}).text.replace('\n', '').strip()

    except: # Breaks when no more games are found
        print('Break')
        break
    count_b = 0
    count_s = 0
    for pitch in pitches[::-1]: # Iterating through each pitch in the game
        try:
            row = pitch.find_all('span')

            pitcher = row[1].text
            batter = row[3].text
            inning = int(row[7].text)
            result = row[8].text
            pitch_type = row[9].text
            mph = float(row[10].text)
            rpm = int(row[11].text)

            vbreak = int(row[12].text)
            up_down = row[13].text
            hbreak = int(row[14].text)
            l_r = row[15].text
            entry = [target_date, t1, pitcher, t2, batter, inning, result, pitch_type, mph, rpm, vbreak, up_down, hbreak, l_r, (count_b, count_s)]
            df.loc[len(df)] = entry
        except:
            pass
        if ('ball' in result.lower()) or ('pitchout' in result.lower()): # Logic tree to figure out what the ball/strike count is
            count_b += 1
        elif ('strike' in result.lower()) or ('missed bunt' == result.lower()) or ('foul bunt' == result.lower()) or ('tip' in result.lower()):
            count_s += 1
        elif 'foul' in result.lower():
            if count_s < 2:
                count_s += 1
        if (count_b == 4) or (count_s == 3) or ('in play' in result.lower()) or ('hit by pitch' == result.lower()):
            count_b = 0
            count_s = 0

In [620]:
# Distilling categories to the four we care about
result_dict = {'Ball': 'Ball',
               'Foul': 'Foul',
               'Called Strike': 'Strike',
               'Swinging Strike': 'Strike',
               'In play, out(s)': 'Contact',
               'In play, no out': 'Contact',
               'Ball In Dirt': 'Ball',
               'In play, run(s)': 'Contact',
               'Foul Tip': 'Strike',
               'Hit By Pitch': 'Ball',
               'Foul Bunt': 'Strike',
               'Missed Bunt': 'Strike',
               'Pitchout': 'Ball'}

# Distilling pitches into groups we care about
def group_pitches(x):
    if 'Fastball' in x:
        return 'Fastball'
    elif 'Curve' in x:
        return 'Curveball'
    elif ('Knuc' in x) or (x == 'Splitter') or (x == 'Eephus'):
        return 'Splitter'
    else:
        return x


In [621]:
# Making sure we keep the raw data
df['left_right_raw'] = df['left_right']
df['result_raw'] = df['result']
df['pitch_type_raw'] = df['pitch_type']

# Narrowing down categories
df['left_right'] = df['left_right'].apply(lambda x: x == '←')
df['result'] = df['result'].map(result_dict)
df['pitch_type'] = df['pitch_type'].apply(group_pitches)

In [622]:
pitch_types = ['Fastball', 'Slider', 'Sinker', 'Changeup', 'Curveball', 'Splitter', 'Cutter']

In [623]:
from sklearn.ensemble import RandomForestClassifier
from joblib import load

Loading the pre-trained model

In [624]:
rf_dict = {}
for pitch_type in pitch_types:
    rf_dict[pitch_type] = load(pitch_type + '3.rf')

In [625]:
def calculate_fifax(args):
    p_type, mph, rpm, vbreak, hbreak, is_left = args[0], args[1], args[2], args[3], args[4], args[5]
    X = np.array([mph, rpm, vbreak, hbreak, is_left]).reshape(1,5)
    return rf_dict[p_type].predict_proba(X)[0][0]

In [626]:
# Calculating FiFaX for each pitch thrown
df['fifax'] = df[['pitch_type', 'mph', 'rpm', 'vbreak', 'hbreak', 'left_right']].apply(calculate_fifax, axis = 1)

In [627]:
df.loc[(df.result == 'Strike') & (df.pitch_type == 'Changeup')].sort_values(by = 'fifax', ascending = False).head(20)

Unnamed: 0,date,t1,pitcher,t2,batter,inning,result,pitch_type,mph,rpm,vbreak,up_down,hbreak,left_right,count,left_right_raw,result_raw,pitch_type_raw,fifax
3676,2022-5-15,Cubs,Scott Effross,Dbacks,Cooper Hummel,8,Strike,Changeup,81.9,2014,51,↓,17,True,"(0, 1)",←,Swinging Strike,Changeup,0.580812
701,2022-5-15,Reds,Jose Quintana,Pirates,Brandon Drury,4,Strike,Changeup,87.4,1327,28,↓,18,False,"(0, 0)",→,Called Strike,Changeup,0.580028
3649,2022-5-15,Cubs,Noe Ramirez,Dbacks,Willson Contreras,7,Strike,Changeup,85.7,1928,44,↓,19,True,"(1, 1)",←,Swinging Strike,Changeup,0.564923
704,2022-5-15,Reds,Jose Quintana,Pirates,Tommy Pham,4,Strike,Changeup,86.7,1336,31,↓,19,False,"(0, 1)",→,Swinging Strike,Changeup,0.560089
1786,2022-5-15,Brewers,Brandon Woodruff,Marlins,Jesus Sanchez,2,Strike,Changeup,85.0,1800,29,↓,19,True,"(1, 2)",←,Swinging Strike,Changeup,0.551889
1662,2022-5-15,Blue Jays,Trevor Richards,Rays,Brett Phillips,7,Strike,Changeup,85.2,2383,30,↓,16,True,"(0, 2)",←,Swinging Strike,Changeup,0.550217
2002,2022-5-15,Brewers,Dylan Floro,Marlins,Omar Narvaez,9,Strike,Changeup,84.3,1597,36,↓,19,True,"(1, 0)",←,Foul Tip,Changeup,0.548745
3677,2022-5-15,Cubs,Scott Effross,Dbacks,Cooper Hummel,8,Strike,Changeup,82.8,2061,49,↓,17,True,"(0, 2)",←,Swinging Strike,Changeup,0.544956
1399,2022-5-15,Mariners,Joely Rodriguez,Mets,Cal Raleigh,8,Strike,Changeup,88.9,1074,39,↓,13,False,"(1, 1)",→,Swinging Strike,Changeup,0.544506
3415,2022-5-15,Angels,Domingo Acevedo,Athletics,Taylor Ward,9,Strike,Changeup,86.2,1789,31,↓,21,True,"(1, 1)",←,Swinging Strike,Changeup,0.54356


# Scraping baseballsavant for videos

First, need to scrape all player ids

In [628]:
pitcher_dict = {}
batter_dict = {}

Methods to match player names

In [629]:
def comma_name_to_full(comma_name):
    name_list = comma_name.split(', ')
    if len(name_list) == 2:
        return name_list[1] + ' ' + name_list[0]

In [630]:
def delete_suffix(suffix_name):
    suffix_name = suffix_name.replace(' Jr.', '')
    suffix_name = suffix_name.replace(' III', '')
    suffix_name = suffix_name.replace(' II', '')
    return suffix_name

Searches for all players in 2022 season, then grabs their player code

In [631]:
batter_path = f'https://baseballsavant.mlb.com/leaderboard/custom?year={yesterday.year}&type=batter&filter=&sort=1&sortDir=desc&min=1&selections='
driver.get(batter_path)
time.sleep(3)
soup = BeautifulSoup(driver.page_source, 'html.parser')
batters = soup.find('div', {'id':'sortable_stats'}).find('tbody').find_all('tr', {'class':'default-table-row'})

In [632]:
for batter in batters:
    batter_name = batter.find('a').text
    batter_id = batter.find('a')['href'].split('/')[-1]
    batter_dict[delete_suffix(comma_name_to_full(batter_name))] = batter_id

In [633]:
pitcher_path = f'https://baseballsavant.mlb.com/leaderboard/custom?year={yesterday.year}&type=pitcher&filter=&sort=4&sortDir=asc&min=1&selections=p_formatted_ip,exit_velocity_avg,launch_angle_avg,barrel_batted_rate,&chart=false&x=p_formatted_ip&y=p_formatted_ip&r=no&chartType=beeswarm'
driver.get(pitcher_path)
time.sleep(3)
soup = BeautifulSoup(driver.page_source, 'html.parser')
pitchers = soup.find('div', {'id':'sortable_stats'}).find('tbody').find_all('tr', {'class':'default-table-row'})

In [634]:
for pitcher in pitchers:
    pitcher_name = pitcher.find('a').text
    pitcher_id = pitcher.find('a')['href'].split('/')[-1]
    pitcher_dict[delete_suffix(comma_name_to_full(pitcher_name))] = pitcher_id

Searches up the pitch in question, then gets the URL of the raw mp4 video

In [635]:
def get_mp4(leader, driver):
    url = f"https://baseballsavant.mlb.com/statcast_search?hfPT=&hfAB=&hfGT=R%7C&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfPull=&hfC={leader['count'][0]}{leader['count'][1]}%7C&hfSea={yesterday.year}%7C&hfSit=&player_type=pitcher&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&game_date_gt={yesterday_date}&game_date_lt=&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&batters_lookup%5B%5D={batter_dict[delete_suffix(leader.batter)]}&hfFlag=&hfBBT=&pitchers_lookup%5B%5D={pitcher_dict[delete_suffix(leader.pitcher)]}&metric_1=&hfInn={leader.inning}%7C&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=api_p_release_speed&sort_order=desc&min_pas=0#results"
    
    driver.get(url)
    time.sleep(3)
    try:
        driver.find_element_by_class_name('player_name').click()
    except:
        url = f"https://baseballsavant.mlb.com/statcast_search?hfPT=&hfAB=&hfGT=R%7C&hfPR=called%5C.%5C.strike%7Cmissed%5C.%5C.bunt%7Cfoul%5C.%5C.tip%7Cswinging%5C.%5C.pitchout%7Cswinging%5C.%5C.strike%7Cswinging%5C.%5C.strike%5C.%5C.blocked%7C&hfZ=&stadium=&hfBBL=&hfNewZones=&hfPull=&hfC=&hfSea={yesterday.year}%7C&hfSit=&player_type=pitcher&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&game_date_gt={yesterday_date}&game_date_lt=&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&batters_lookup%5B%5D={batter_dict[delete_suffix(leader.batter)]}&hfFlag=&hfBBT=&pitchers_lookup%5B%5D={pitcher_dict[delete_suffix(leader.pitcher)]}&metric_1=&hfInn={leader.inning}%7C&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=api_p_release_speed&sort_order=desc&min_pas=0#results"
        driver.get(url)
        time.sleep(3)
        try:
            driver.find_element_by_class_name('player_name').click()
        except:
            url = f"https://baseballsavant.mlb.com/statcast_search?hfPT=&hfAB=&hfGT=R%7C&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfPull=&hfC={leader['count'][0]}{leader['count'][1]}%7C&hfSea={yesterday.year}%7C&hfSit=&player_type=pitcher&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&game_date_gt={yesterday_date}&game_date_lt=&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&batters_lookup%5B%5D={batter_dict[delete_suffix(leader.batter)]}&hfFlag=&hfBBT=&metric_1=&hfInn={leader.inning}%7C&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=api_p_release_speed&sort_order=desc&min_pas=0#results"
            driver.get(url)
            time.sleep(3)
            try:
                driver.find_element_by_class_name('player_name').click()
            except:
                url = f"https://baseballsavant.mlb.com/statcast_search?hfPT=&hfAB=&hfGT=R%7C&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfPull=&hfC={leader['count'][0]}{leader['count'][1]}%7C&hfSea={yesterday.year}%7C&hfSit=&player_type=pitcher&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&game_date_gt={two_dates_ago}&game_date_lt=&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&batters_lookup%5B%5D={batter_dict[delete_suffix(leader.batter)]}&hfFlag=&hfBBT=&pitchers_lookup%5B%5D={pitcher_dict[delete_suffix(leader.pitcher)]}&metric_1=&hfInn={leader.inning}%7C&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=api_p_release_speed&sort_order=desc&min_pas=0#results"
                driver.get(url)
                time.sleep(3)
                driver.find_element_by_class_name('player_name').click()
                
        
    time.sleep(5)
    try:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        driver.get('https://baseballsavant.mlb.com' + soup.find('div', {'id':'search-results'}).find('a')['href'])
    except:
        time.sleep(10)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        driver.get('https://baseballsavant.mlb.com' + soup.find('div', {'id':'search-results'}).find('a')['href'])
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    source_url = soup.find('video', {'id':'sporty'}).find('source')['src']
    return source_url

Goes through the FiFaX leaderboard for each pitch type and gets mp4 url of video

In [636]:
df['url'] = 'None'
leaderboard = df.loc[df['result'] == 'Strike'].sort_values(by = 'fifax', ascending = False).head(5)

for i in range(0, len(leaderboard)):
    leader = leaderboard.iloc[i]
    if df.loc[leaderboard.index[i], 'url'] == 'None':
        df.loc[leaderboard.index[i], 'url'] = get_mp4(leader, driver)

  driver.find_element_by_class_name('player_name').click()


In [637]:
#df.loc[3767, 'url'] = 'https://sporty-clips.mlb.com/5754229d-b6ec-44e9-982f-5259a406f1c7.mp4'

In [638]:
#df.loc[3336, 'fifax'] = .5

In [639]:
leaderboard

Unnamed: 0,date,t1,pitcher,t2,batter,inning,result,pitch_type,mph,rpm,vbreak,up_down,hbreak,left_right,count,left_right_raw,result_raw,pitch_type_raw,fifax,url
250,2022-5-15,Padres,Kenley Jansen,Braves,Jurickson Profar,9,Strike,Sinker,93.5,2290,10,↓,3,True,"(0, 0)",←,Called Strike,Sinker,0.814962,
3344,2022-5-15,Angels,Zach Jackson,Athletics,Chad Wallach,7,Strike,Fastball,96.1,2233,7,↓,6,True,"(3, 2)",←,Swinging Strike,4-Seam Fastball,0.726754,
4079,2022-5-15,Giants,Carlos Rodon,Cardinals,Tommy Edman,2,Strike,Fastball,97.8,2422,8,↓,6,False,"(1, 2)",→,Swinging Strike,4-Seam Fastball,0.702597,
252,2022-5-15,Padres,Kenley Jansen,Braves,Wil Myers,9,Strike,Cutter,91.8,2551,14,↓,8,False,"(0, 0)",→,Swinging Strike,Cutter,0.683656,
4104,2022-5-15,Giants,Carlos Rodon,Cardinals,Paul Goldschmidt,3,Strike,Fastball,96.0,2306,9,↓,9,False,"(0, 1)",→,Swinging Strike,4-Seam Fastball,0.678827,


In [640]:
pitch_types = ['Fastball', 'Slider', 'Sinker', 'Changeup', 'Curveball', 'Cutter', 'Splitter']
for pitch_type in pitch_types:
    leaderboard = df.loc[(df['result'] == 'Strike') & (df['pitch_type'] == pitch_type)].sort_values(by = 'fifax', ascending = False).head(5)

    for i in range(0, len(leaderboard)):
        leader = leaderboard.iloc[i]
        if df.loc[leaderboard.index[i], 'url'] == 'None':
            df.loc[leaderboard.index[i], 'url'] = get_mp4(leader, driver)

  driver.find_element_by_class_name('player_name').click()


For Hitting Samurai

In [641]:
leaderboard = df.loc[(df['result_raw'] == 'In play, no out') | (df['result_raw'] == 'In play, run(s)')].sort_values(by = 'fifax', ascending = False).head(5)

for i in range(0, len(leaderboard)):
    leader = leaderboard.iloc[i]
    df.loc[leaderboard.index[i], 'url'] = get_mp4(leader, driver)

  driver.find_element_by_class_name('player_name').click()


# Save as CSV, then upload

In [642]:
df.to_csv('daily_push.csv')

In [643]:
pip install google-cloud-storage

Note: you may need to restart the kernel to use updated packages.


In [644]:
from google.cloud import storage

In [645]:
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    
    blob.upload_from_filename(source_file_name)
    
    print('{} with contents {} uploaded to {}'.format(destination_blob_name, source_file_name, bucket_name))
    
    

In [646]:
# Uploading to cloud database
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'google-cloud-auth.json'
upload_blob('the-filthiest', 'daily_push.csv', 'pitch-data/' + target_date + '.csv')

pitch-data/2022-5-15.csv with contents daily_push.csv uploaded to the-filthiest


In [647]:
driver.quit()

# Post to Twitter

In [648]:
!pip install tweepy
import tweepy
import requests
import json




Creating a dict that matches player names to their Twitter handles

In [649]:
class Dict(dict):
        def __missing__(self, key):
            return key
        
def process_twitter_handle(raw_handle):
    output = raw_handle.replace('\n','').split('\t')
    return output        

twitter_handles = open('twitter_handles.txt')
handles = twitter_handles.readlines()


twitter_handle_dict = Dict()
for handle_row in handles:
    name, handle = process_twitter_handle(handle_row)
    twitter_handle_dict[delete_suffix(name)] = handle

In [650]:
js = open('twitter-auth.json')
twitter_auth = json.load(js)


Setting up the Twitter templates for #TheFilthiest and #HittingSamurai

In [651]:
team_handles_hash = {
    'Orioles': ('@Orioles','#Birdland'),
    'Red Sox': ('@RedSox','#DirtyWater'),
    'Rockies': ('@Rockies','#Rockies'),
    'White Sox': ('@WhiteSox','#ChangeTheGame'),
    'Phillies': ('@Phillies','#RingTheBell'),
    'Marlins': ('@Marlins','#MakeItMiami'),
    'Guardians': ('@CleGuardians','#ForTheLand'),
    'Cardinals': ('@Cardinals','#STLCards'),
    'Brewers': ('@Brewers','#ThisIsMyCrew'),
    'Astros': ('@Astros','#LevelUp'),
    'Giants': ('@SFGiants','#SFGameUp'),
    'Mariners': ('@Mariners', '#SeaUsRise'),
    'Blue Jays': ('@BlueJays','#NextLevel'),
    'Cubs': ('@Cubs','#ItsDifferentHere'),
    'Yankees': ('@Yankees','#RepBX'),
    'Rangers': ('@Rangers','#StraightUpTX'),
    'Rays': ('@RaysBaseball','#RaysUp'),
    'Nationals': ('@Nationals','#NATITUDE'),
    'Twins': ('@Twins','#MNTwins'),
    'Angels': ('@Angels','#GoHalos'),
    'Mets': ('@Mets','#LGM'),
    'Pirates': ('@Pirates','#LetsGoBucs'),
    'Padres': ('@Padres','#TimeToShine'),
    'Reds': ('@Reds','#ATOBTTR'),
    'Dbacks': ('@Dbacks', '#DBacks'),
    'Tigers': ('@Tigers','#DetroitRoots'),
    'Dodgers': ('@Dodgers','#AlwaysLA'),
    'Royals': ('@Royals','#TogetherRoyal'),
    'Braves': ('@Braves','#ForTheA'),
    "Athletics": ('@Athletics','#DrumTogether')
    
}

number_dict = {1:'1️⃣\U0001F947',
              2:'2️⃣\U0001F948',
              3:'3️⃣\U0001F949',
              4:'4️⃣',
              5:'5️⃣'}

emoji_dict = {1:'\U0001F92E',
              2:'\U0001F631',
              3:'\U0001F974',
              4:'\U0001F62F',
              5:'\U0001F44D'}

def twitter_post(filename, tweet):
    twitter_auth_keys = {
        'bearer_token'        : twitter_auth['twitter_auth']['bearer_token'],
        "consumer_key"        : twitter_auth['twitter_auth']['consumer_key'],
        "consumer_secret"     : twitter_auth['twitter_auth']['consumer_secret'],
        "access_token"        : twitter_auth['twitter_auth']['access_token'],
        "access_token_secret" : twitter_auth['twitter_auth']['access_token_secret']
    }

    client = tweepy.Client(consumer_key=twitter_auth_keys['consumer_key'],
                           consumer_secret=twitter_auth_keys['consumer_secret'],
                           access_token=twitter_auth_keys['access_token'],
                           access_token_secret=twitter_auth_keys['access_token_secret'])
    auth = tweepy.OAuthHandler(
            twitter_auth_keys['consumer_key'],
            twitter_auth_keys['consumer_secret']
            )
    auth.set_access_token(
            twitter_auth_keys['access_token'],
            twitter_auth_keys['access_token_secret']
            )
    api = tweepy.API(auth)
 
    media = api.media_upload(filename, media_category = 'tweet_video')
    #tweet = f"#️⃣{number_dict[rank]} on #TheFilthiest for {yesterday_date}: {pitcher}'s {pitch_type} to {batter} {emoji_dict[rank]}\n\nMPH 🚀: {mph}\nRPM 💫: {rpm}\nFiFaX 🤯: {fifax:.3f}\n\n{team_handles_hash[team1][0]} {team_handles_hash[team1][1]} | {team_handles_hash[team2][0]} {team_handles_hash[team2][1]}"
    status = api.update_status(status=tweet, media_ids = [media.media_id])
 


In [652]:
def pitcher_post(rank, url, pitcher, batter, pitch_type, team1, team2, mph, rpm, fifax, filename):
    poi = ''
    if pitcher in twitter_handle_dict.keys():
        poi = '(' + twitter_handle_dict[pitcher] + ') '
    tweet = f"#️⃣{number_dict[rank]} on #TheFilthiest for {yesterday_date}: {poi}{pitcher}'s {pitch_type} to {batter} {emoji_dict[rank]}\n\nMPH 🚀: {mph}\nRPM 💫: {rpm}\nFiFaX 🤯: {fifax:.3f}\n\n{team_handles_hash[team1][0]} {team_handles_hash[team1][1]} | {team_handles_hash[team2][0]} {team_handles_hash[team2][1]}"
    twitter_post(filename, tweet)

def batter_post(rank, url, pitcher, batter, pitch_type, team1, team2, mph, rpm, fifax, filename):
    poi = ''
    if batter in twitter_handle_dict.keys():
        poi = '(' + twitter_handle_dict[batter] + ') '
    tweet = f"#️⃣{number_dict[rank]} on #HittingSamurai for {yesterday_date}: {poi}{batter} prevails against {pitcher}'s {pitch_type} 🗡️\n\nMPH 🚀: {mph}\nRPM 💫: {rpm}\nFiFaX 🤯: {fifax:.3f}\n\n{team_handles_hash[team1][0]} {team_handles_hash[team1][1]} | {team_handles_hash[team2][0]} {team_handles_hash[team2][1]}"
    twitter_post(filename, tweet)

In [653]:
leaderboard = df.loc[df['result'] == 'Strike'].sort_values(by = 'fifax', ascending = False).head(5)
leaderboard

Unnamed: 0,date,t1,pitcher,t2,batter,inning,result,pitch_type,mph,rpm,vbreak,up_down,hbreak,left_right,count,left_right_raw,result_raw,pitch_type_raw,fifax,url
250,2022-5-15,Padres,Kenley Jansen,Braves,Jurickson Profar,9,Strike,Sinker,93.5,2290,10,↓,3,True,"(0, 0)",←,Called Strike,Sinker,0.814962,https://sporty-clips.mlb.com/5fa65996-e094-490...
3344,2022-5-15,Angels,Zach Jackson,Athletics,Chad Wallach,7,Strike,Fastball,96.1,2233,7,↓,6,True,"(3, 2)",←,Swinging Strike,4-Seam Fastball,0.726754,https://sporty-clips.mlb.com/4e311551-4d89-4ff...
4079,2022-5-15,Giants,Carlos Rodon,Cardinals,Tommy Edman,2,Strike,Fastball,97.8,2422,8,↓,6,False,"(1, 2)",→,Swinging Strike,4-Seam Fastball,0.702597,https://sporty-clips.mlb.com/54de8c69-912d-478...
252,2022-5-15,Padres,Kenley Jansen,Braves,Wil Myers,9,Strike,Cutter,91.8,2551,14,↓,8,False,"(0, 0)",→,Swinging Strike,Cutter,0.683656,https://sporty-clips.mlb.com/2ad9f082-7670-40c...
4104,2022-5-15,Giants,Carlos Rodon,Cardinals,Paul Goldschmidt,3,Strike,Fastball,96.0,2306,9,↓,9,False,"(0, 1)",→,Swinging Strike,4-Seam Fastball,0.678827,https://sporty-clips.mlb.com/70299da6-3b8c-40c...


Given a leaderboard, posting the pitch info and mp4 to Twitter

In [654]:
#countdown = 5
filename = 'upload_vid.mp4'
def post_leaderboard_to_twitter(leaderboard, for_pitcher):
    countdown = len(leaderboard)
    for i in leaderboard.index[::-1]:
        if leaderboard.loc[i,'url'] == 'None':
            pass
        else:
            r = requests.get(leaderboard.loc[i,'url'], stream = True)
            if r.ok:
                print("saving to", filename)
                with open(filename, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=1024 * 8):
                        if chunk:
                            f.write(chunk)
                            f.flush()
                            os.fsync(f.fileno())
            if for_pitcher:
                pitcher_post(countdown, leaderboard.loc[i,'url'], leaderboard.loc[i, 'pitcher'], leaderboard.loc[i,'batter'],
                     leaderboard.loc[i,'pitch_type_raw'], leaderboard.loc[i, 't1'], leaderboard.loc[i, 't2'],
                     leaderboard.loc[i, 'mph'], leaderboard.loc[i, 'rpm'], leaderboard.loc[i, 'fifax'], filename)
            else:
                batter_post(countdown, leaderboard.loc[i,'url'], leaderboard.loc[i, 'pitcher'], leaderboard.loc[i,'batter'],
                     leaderboard.loc[i,'pitch_type_raw'], leaderboard.loc[i, 't1'], leaderboard.loc[i, 't2'],
                     leaderboard.loc[i, 'mph'], leaderboard.loc[i, 'rpm'], leaderboard.loc[i, 'fifax'], filename)
            countdown -= 1
            time.sleep(60)

Post the FiFaX leaderboard that results in a strike, then for reaching safely/scoring runs

In [655]:
leaderboard = df.loc[df['result'] == 'Strike'].sort_values(by = 'fifax', ascending = False).head(5)
post_leaderboard_to_twitter(leaderboard, True)
leaderboard = df.loc[(df['result_raw'] == 'In play, no out') | (df['result_raw'] == 'In play, run(s)')].sort_values(by = 'fifax', ascending = False).head(5)
post_leaderboard_to_twitter(leaderboard, False)

saving to upload_vid.mp4
saving to upload_vid.mp4
saving to upload_vid.mp4
saving to upload_vid.mp4
saving to upload_vid.mp4
saving to upload_vid.mp4
saving to upload_vid.mp4
saving to upload_vid.mp4
saving to upload_vid.mp4
saving to upload_vid.mp4
