# A. Morning Dashboard
- Date: 3/23/2024
- Description: This runs every morning to gather time-insensitive data:
    - DraftKings upcoming contests, salaries, and previous results
    - Stats API and Statcast
    - Steamer

### Imports

In [1]:
%run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"

### Games

Select start and end date (should be yesterdaysdate for both if running day-of) <br>
Note: Past information may not be available for all scrapes.

In [4]:
start_date = yesterdaysdate
end_date = yesterdaysdate

Read in games

In [5]:
game_df = read_and_save_games(team_map, generate=True)

In [6]:
game_df = game_df[(game_df['date'] >= start_date) & (game_df['date'] <= end_date)].reset_index(drop=True)

### A01. DraftKings

In [8]:
%run "C:\Users\james\Documents\MLB\Code\A01. DraftKings.ipynb"

##### 1. Contests

In [9]:
# Scrape contests
contest_df = contests(todaysdate)
# To csv
contest_df.to_csv(os.path.join(baseball_path, 'A01. DraftKings', '1. Contests', f'Contests {todaysdate}.csv'), index=False)

##### 7. Subsets

In [10]:
# Select subset
subset_df = create_subset(contest_df, contests_per_draftGroupId=5, entry_fee_max=100, added_contestKeys=[], date_dash=todaysdate_dash)
subset_df.reset_index(drop=True, inplace=True)

# Save subset
subset_df.to_csv(os.path.join(baseball_path, "A01. DraftKings", "7. Subsets", f'Subset {todaysdate}.csv'), index=False)

##### 2. Draftables

In [11]:
%%time
# Loop over contests of interest
for draftGroupId in list(subset_df['draftGroupId'].unique()):
    print(draftGroupId)
    try:
        # Scrape draftables (DK Salaries)
        draftable_df = draftables(draftGroupId)
        # To csv
        draftable_df.to_csv(os.path.join(baseball_path, "A01. DraftKings", "2. Draftables", f"Draftables {draftGroupId}.csv"), index=False, encoding='iso-8859-1')
        print(f"Saved {draftGroupId}")
    except:
        print(f"Didn't save {draftGroupId}")

105038
Saved 105038
105035
Didn't save 105035
105034
Didn't save 105034
105033
Saved 105033
105032
Didn't save 105032
105031
Saved 105031
105043
Saved 105043
105045
Saved 105045
105052
Saved 105052
CPU times: total: 2.23 s
Wall time: 4.86 s


##### 3. Payouts

In [12]:
%%time
# Loop over contests of interest
for i in range(len(subset_df)):
    # Extract contestKey
    contestKey = subset_df['contestKey'][i]
    try:
        # Scrape payouts
        payout_df = payouts(contestKey)
        # To csv
        payout_df.to_csv(os.path.join(baseball_path, "A01. DraftKings", "3. Payouts", f"Payouts {contestKey}.csv"), index=False, encoding='iso-8859-1')
    except:
        print(f"Didn't save {contestKey}.")

CPU times: total: 6.67 s
Wall time: 15.2 s


##### 4. Results, 5. Entry Results, and 6. Player Results

Note: This will break if there were no games yesterday.

In [27]:
# Read in yesterdays subset
yesterdays_subset_df = pd.read_csv(os.path.join(baseball_path, 'A01. DraftKings', '7. Subsets', f'Subset {yesterdaysdate}.csv'))

In [14]:
# Loop over yesterday's contests
for i in range(len(yesterdays_subset_df)):
    # Extract contestKey
    contestKey = yesterdays_subset_df['contestKey'][i]
    print(contestKey)
    # Scrape results
    results(contestKey, sleep_time=5)

    try:        
        # Read into pandas
        results_df = pd.read_csv(os.path.join(baseball_path, "A01. DraftKings", "4. Results", f"contest-standings-{contestKey}.csv"))
        
        # Entry results
        entry_results_df = entry_results(results_df)
        entry_results_df.to_csv(os.path.join(baseball_path, "A01. DraftKings", "5. Entry Results", f"Entry Results {contestKey}.csv"), index=False, encoding='iso-8859-1')
    
        # Player results
        player_results_df = player_results(results_df)
        player_results_df.to_csv(os.path.join(baseball_path, "A01. DraftKings", "6. Player Results", f"Player Results {contestKey}.csv"), index=False, encoding='iso-8859-1')
    except IndexError as e:
        print(f"Downloaded contest-standings-{contestKey}. Non-traditional format.")
    except pd.errors.EmptyDataError as e:
        print(f"Downloaded contest-standings-{contestKey}. Empty file.")

160773720
contest-standings-160773720
['contest-standings-160773720.zip']
C:\Users\james\Downloads\contest-standings-160773720.zip
Zip file unpacked successfully!
160773723
contest-standings-160773723
['contest-standings-160773723.zip']
C:\Users\james\Downloads\contest-standings-160773723.zip
Zip file unpacked successfully!
160773721
contest-standings-160773721
['contest-standings-160773721.zip']
C:\Users\james\Downloads\contest-standings-160773721.zip
Zip file unpacked successfully!
160773719
contest-standings-160773719
['contest-standings-160773719.csv']
C:\Users\james\Downloads\contest-standings-160773719.csv
File copied successfully!
160773718
contest-standings-160773718
['contest-standings-160773718.csv']
C:\Users\james\Downloads\contest-standings-160773718.csv
File copied successfully!
160764296
contest-standings-160764296
['contest-standings-160764296.csv']
C:\Users\james\Downloads\contest-standings-160764296.csv
File copied successfully!
Downloaded contest-standings-160764296. 

### A02. MLB API

In [7]:
%run "C:\Users\james\Documents\MLB\Code\A02. MLB API.ipynb"

##### 1. Stats API 

In [8]:
statsapi_df = plays_statsapi("03/20/2024", "11/15/2024")
statsapi_df.to_csv(os.path.join(baseball_path, "A02. MLB API", "1. Stats API", "Stats API 2024.csv"), index=False, encoding='iso-8859-1')

del statsapi_df

##### 2. Statcast

In [9]:
statcast_df = plays_statcast("2024-03-20", "2024-11-15")
statcast_df.to_csv(os.path.join(baseball_path, "A02. MLB API", "2. Statcast", "Statcast 2024.csv"), index=False, encoding='iso-8859-1')

del statcast_df

This is a large query, it may take a moment to complete


100%|██████████| 241/241 [07:33<00:00,  1.88s/it]


### A03. Steamer

Note: Ensure code is on same screen as web browser for pyautogui controls to work as intended.

In [10]:
%run "C:\Users\james\Documents\MLB\Code\A03. Steamer.ipynb"

In [11]:
time.sleep(5)

In [12]:
copy_page_source('http://www.steamerprojections.com/index.php/projections/2024-projections')

url_text = find_url_after_text("Pitchers, 2024", pyperclip.paste())
print(url_text)
print(pyperclip.paste())
webbrowser.open("http:www.steamerprojections.com" + url_text)

url_text = find_url_after_text("Hitters, 2024", pyperclip.paste())
print(url_text)
webbrowser.open("http:www.steamerprojections.com" + url_text)

/index.php/projections/2024-projections?task=callelement&amp;format=raw&amp;item_id=1168&amp;element=2331b84b-d95c-470d-89ef-f458ce60ad9a&amp;method=download&amp;args[0]=0.0&amp;args[1]=8dcfb20d925a756767fc73d2e62fa403

<!DOCTYPE html>
<html lang="en-gb" dir="ltr" vocab="http://schema.org/">
    <head>
        <meta http-equiv="X-UA-Compatible" content="IE=edge">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <link rel="shortcut icon" href="/images/ornament-bg5.png">
        <link rel="apple-touch-icon-precomposed" href="/images/ornament-bg5.png">
        <meta charset="utf-8" />
	<base href="http://www.steamerprojections.com/index.php/projections/2024-projections" />
	<meta name="rights" content="Copyright 2018" />
	<title>Steamer Projections - 2024 Projections</title>
	<link href="/index.php/projections/2024-projections/feed/rss/projections/2024?format=feed" rel="alternate" type="application/rss+xml" title="RSS 2.0" />
	<link href="/index.php/pro

True

In [13]:
time.sleep(5)

In [14]:
copy_page_source('http://www.steamerprojections.com/index.php/projections/historical-weekly-logs')
webbrowser.open("http:www.steamerprojections.com" + find_url_after_text("Pitchers, 2014-present, weekly in-season log", pyperclip.paste()))
webbrowser.open("http:www.steamerprojections.com" + find_url_after_text("Hitters, 2014-present, weekly in-season log", pyperclip.paste()))

True

### A07. Projections

In [15]:
%run "C:\Users\james\Documents\MLB\Code\A07. Projections.ipynb"

In [16]:
# Scrape slates
dff_slates_df = dff_slates(todaysdate)
# To csv
dff_slates_df.to_csv(os.path.join(baseball_path, "A07. Projections", "1. DFF", "1. Slates", f"DFF Slates {todaysdate}.csv"), index=False)

In [17]:
# Scrape slates
roto_slates_df = roto_slates(todaysdate)
# To csv
roto_slates_df.to_csv(os.path.join(baseball_path, "A07. Projections", "2. RotoWire", "1. Slates", f"RotoWire Slates {todaysdate}.csv"), index=False)

### A10. Player Results

In [18]:
%run "C:\Users\james\Documents\MLB\Code\A10. Player Results.ipynb"

Note: This will break if there were no games yesterday.

In [19]:
%%time
for i in range(len(game_df)):
    run_result_dfs(i)

746477
746481
747047
746073
744942
745831
746968
746806
745997
745018
746398
746239
747210
745673
745426
745260
CPU times: total: 9.78 s
Wall time: 15.4 s


### A05. Rosters

Note: This should fix incorrect or missing information from yesterday. 

In [20]:
%run "C:\Users\james\Documents\MLB\Code\A05. Rosters.ipynb"

##### 1. Batting Orders

In [21]:
# Run in parallel
empty_list = Parallel(n_jobs=-1, verbose=0)(delayed(orders)(team_map, game_df, row) for row in range(len(game_df)))

##### 2. Rosters

In [22]:
# Run in parallel
empty_list = Parallel(n_jobs=-1, verbose=0)(delayed(rosters)(team_map, game_df, row) for row in range(len(game_df)))

### A06. Weather

In [23]:
%run "C:\Users\james\Documents\MLB\Code\A06. Weather.ipynb"

In [24]:
daily_weather_df = park_and_weather_factors(game_df, date=yesterdaysdate, overwrite_year=2023)

# To csv
daily_weather_df.to_csv(os.path.join(baseball_path, "A06. Weather", "4. Park and Weather Factors", f'Park and Weather Factors {yesterdaysdate}.csv'), index=False)

20240430


### A09. Contest Guides

In [25]:
%run "C:\Users\james\Documents\MLB\Code\A09. Contest Guides.ipynb"

In [28]:
# Loop over contestKeys
yesterdays_subset_df = yesterdays_subset_df.reset_index(drop=True)

for contestKey in yesterdays_subset_df['contestKey'].reset_index(drop=True):
    print(contestKey)
    try:
        guide = contest_guide(game_df, subset_df=yesterdays_subset_df, contestKey=contestKey)
        if not guide.empty:
            guide.to_csv(os.path.join(baseball_path, "A09. Contest Guides", f"Contest Guide {contestKey}.csv"), index=False)
        else:
            print(f"Contest Guide {contestKey} is empty.")
    except FileNotFoundError as e:
        print(f"Draftables {contestKey}.csv not found.")

161050365
Draftables 161050365.csv not found.
161050364
Draftables 161050364.csv not found.
161050363
Draftables 161050363.csv not found.
161050360
Draftables 161050360.csv not found.
161050361
Draftables 161050361.csv not found.
161050073
Draftables 161050073.csv not found.
161050069
Draftables 161050069.csv not found.
161051762
Draftables 161051762.csv not found.
161051189
Draftables 161051189.csv not found.
161050070
Draftables 161050070.csv not found.
161050092
161050095
161050093
161050098
161050090
161050062
161050063
161050066
161051166
161051673
161050439
Draftables 161050439.csv not found.
161050438
Draftables 161050438.csv not found.
161050435
Draftables 161050435.csv not found.
161050437
Draftables 161050437.csv not found.
161050436
Draftables 161050436.csv not found.
161050039
Draftables 161050039.csv not found.
161050040
Draftables 161050040.csv not found.
161050041
Draftables 161050041.csv not found.
161050038
Draftables 161050038.csv not found.
161051495
Draftables 16105

### B01. Matchups

Note: This should fix incorrect or missing information from yesterday. 

In [29]:
%run "C:\Users\james\Documents\MLB\Code\A02. MLB API.ipynb"
%run "C:\Users\james\Documents\MLB\Code\A03. Steamer.ipynb"
%run "C:\Users\james\Documents\MLB\Code\B01. Matchups.ipynb"

In [30]:
%%time
# Read in dataset 
complete_dataset = create_pa_inputs(park_factors, team_map, 2015, 2024, 50, 300)
# Subset
complete_dataset = complete_dataset.query('date > 20220301')

# Read in Steamer hitters
steamer_hitters_df = pd.read_csv(os.path.join(baseball_path, "A03. Steamer", "steamer_hitters_weekly_log.csv"), encoding='iso-8859-1')
steamer_hitters_df_current = pd.read_csv(os.path.join(baseball_path, "A03. Steamer", "steamer_hitters.csv"), encoding='iso-8859-1')
steamer_hitters_df = pd.concat([steamer_hitters_df, steamer_hitters_df_current], axis=0)
steamer_hitters_df['proj_year'].fillna(2024, inplace=True)
steamer_hitters_df['proj_date'].fillna(todaysdate_dash, inplace=True)
steamer_hitters_df = clean_steamer_hitters(steamer_hitters_df)

# Read in Steamer pitchers
steamer_pitchers_df = pd.read_csv(os.path.join(baseball_path, "A03. Steamer", "steamer_pitchers_weekly_log.csv"), encoding='iso-8859-1')
steamer_pitchers_df_current = pd.read_csv(os.path.join(baseball_path, "A03. Steamer", "steamer_pitchers.csv"), encoding='iso-8859-1')
steamer_pitchers_df = pd.concat([steamer_pitchers_df, steamer_pitchers_df_current], axis=0)
steamer_pitchers_df['proj_year'].fillna(2024, inplace=True)
steamer_pitchers_df['proj_date'].fillna(todaysdate_dash, inplace=True)
steamer_pitchers_df = clean_steamer_pitchers(steamer_pitchers_df)

CPU times: total: 2min 41s
Wall time: 2min 46s


In [31]:
# Write to CSV (we'll read these later in B.)
complete_dataset.to_csv(os.path.join(baseball_path, "Stat Dataset.csv"), index=False)
steamer_hitters_df.to_csv(os.path.join(baseball_path, "Steamer Hitters.csv"), index=False)
steamer_pitchers_df.to_csv(os.path.join(baseball_path, "Steamer Pitchers.csv"), index=False)

In [32]:
%%time
print(len(game_df))
empty_list = Parallel(n_jobs=-1, verbose=True)(delayed(create_matchup_files)(game_df, row, complete_dataset, steamer_hitters_df, steamer_pitchers_df, team_map) for row in range(len(game_df)))

16


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of  16 | elapsed:   18.1s remaining:  2.1min
[Parallel(n_jobs=-1)]: Done  16 out of  16 | elapsed:   59.8s finished


CPU times: total: 46.8 s
Wall time: 1min


### A08. Odds Sportsbook Review

In [33]:
%run "C:\Users\james\Documents\MLB\Code\A08. Odds Sportsbook Review.ipynb"

In [34]:
# Scrape odds from Sportsbook Review
odds_df = create_odds_df(yesterdaysdate_dash)

# Save 
odds_df.to_csv(os.path.join(baseball_path, "A08. Odds Sportsbook Review", f"Odds {yesterdaysdate}.csv"), index=False)

### A03. Steamer

Files were downloaded above. This should give them enough time to finish downloading.

In [35]:
move_steamer()

Moved 'C:\Users\james\Downloads\steamer_pitchers_weekly_log.csv' to 'C:\Users\james\Documents\MLB\Database\A03. Steamer'.
Moved 'C:\Users\james\Downloads\steamer_hitters_weekly_log.csv' to 'C:\Users\james\Documents\MLB\Database\A03. Steamer'.
Moved 'C:\Users\james\Downloads\steamer_pitchers.csv' to 'C:\Users\james\Documents\MLB\Database\A03. Steamer'.
Moved 'C:\Users\james\Downloads\steamer_hitters.csv' to 'C:\Users\james\Documents\MLB\Database\A03. Steamer'.
