# A03. Steamer
Sources: 
- steamerprojections.com

In [14]:
# Download historical weekly logs
def download_logs():
    # Weekly log URL
    steamer_url = 'http://www.steamerprojections.com/index.php/projections/historical-weekly-logs'

    # Load in existing web browser
    javascript_code = f"window.open('{steamer_url}', '_blank');"
    display(Javascript(javascript_code))

    # Wait
    time.sleep(5)
    
    # Identify current screen dimensions
    screenWidth, screenHeight = pyautogui.size()

    # Pitcher download link location
    x_coordinate = 500/2560 * screenWidth
    y_coordinate = 800/1600 * screenHeight

    # Download pitchers
    pyautogui.click(x=x_coordinate, y=y_coordinate)   
    
    # Wait
    time.sleep(60)
    
    destination_folder = r'C:\Users\james\Documents\MLB\Database\A03. Steamer'
    
    # Find latest pitcher download
    matching_files = glob.glob(os.path.join(download_path, "steamer_pitchers*"))

    # Move to destination folder
    if matching_files:
        most_recent_file = max(matching_files, key=os.path.getmtime)
        shutil.move(most_recent_file, os.path.join(destination_folder, os.path.basename(most_recent_file)))
        print(f"Moved '{most_recent_file}' to '{destination_folder}'.")
    else:
        print(f"No matching files found in '{download_path}'.")

    
    # Batter download link location
    x_coordinate = 1400/2560 * screenWidth
    y_coordinate = 780/1600 * screenHeight

    # Download hitters
    pyautogui.click(x=x_coordinate, y=y_coordinate)
    
    time.sleep(60)
    
    # Find latest hitter download
    matching_files = glob.glob(os.path.join(download_path, "steamer_hitters*"))

    # Move to destination folder
    if matching_files:
        most_recent_file = max(matching_files, key=os.path.getmtime)
        shutil.move(most_recent_file, os.path.join(destination_folder, os.path.basename(most_recent_file)))
        print(f"Moved '{most_recent_file}' to '{destination_folder}'.")
    else:
        print(f"No matching files found in '{download_path}'.")

# Clean

##### 1. Steamer Hitters

In [21]:
def clean_steamer_hitters(df):
    ### Hitting
    # Basic stats
    hit_list = ['1B', '2B', '3B', 'HR', 'BB', 'HBP', 'K']

    # Advance stats
    rate_list = ['OBP', 'SLG', 'wOBA']
    for stat in hit_list:
        rate = stat + "_rate"
        rate_list.append(rate)
        df[rate] = df[stat] / df['PA']

    ### Base running
    # Stolen base attempts
    df['SBA'] = df['SB'] + df['CS']
    # Stolen base opportunities (times on first)
    df['SBO'] = df['1B'] + df['BB'] + df['HBP']
    # Implied stolen base attempt rate
    df['sba_imp'] = df['SBA'] / df['SBO']

    # Cap implied stolen base attempt rat 
    df['sba_imp'] = np.where(df['sba_imp'] > 0.15, 0.15, df['sba_imp'])
    
    # Determine stolen base success rate
    df['sbr'] = df['SB'] / df['SBA']
    
    # Fill in missings
    df['sbr'].fillna(0.6, inplace=True) # assume 25th percentile 
    df['sba_imp'].fillna(0.05, inplace=True) # assume low prob
    
    # Date
    df['date'] = df['proj_date'].str.replace("-", "")
    df['date'] = df['date'].astype('int')
    
    
    # Keep relevant variables
    keep_list = ['date', 'firstname', 'lastname', 'mlbamid', 'steamerid', 'sba_imp', 'sbr'] + rate_list
    df = df[keep_list]
    
    # Clean up
    df.columns = df.columns.str.lower()
    df.rename(columns={'1b_rate': 'b1_rate', '2b_rate': 'b2_rate', '3b_rate': 'b3_rate', 'k_rate':'so_rate'}, inplace=True)
    df.dropna(inplace=True)
    
    # Calculate stolen base attempt and success rates by base
    sba_2b_reg = pickle.load(open(os.path.join(model_path, 'sba_2b_20220901.sav'), 'rb'))
    df['sba_2b'] = sba_2b_reg.predict(df[['sba_imp']])

    sba_3b_reg = pickle.load(open(os.path.join(model_path, 'sba_3b_20220901.sav'), 'rb'))
    df['sba_3b'] = sba_3b_reg.predict(df[['sba_imp']])

    sb_2b_reg = pickle.load(open(os.path.join(model_path, 'sb_2b_20220901.sav'), 'rb'))
    df['sb_2b'] = sb_2b_reg.predict(df[['sbr']])

    sb_3b_reg = pickle.load(open(os.path.join(model_path, 'sb_3b_20220901.sav'), 'rb'))
    df['sb_3b'] = sb_3b_reg.predict(df[['sbr']])
    
    return df 

##### 2. Steamer Pitchers

In [30]:
def clean_steamer_pitchers(df):
    # Hits per 9 innings
    df['H9'] = df['H'] / df['IP'] * 9
    
    # Calculate average innings per game started
    df['IP_start'] = df['start_IP'] / df['GS']
    df['IP_start'].fillna(0, inplace=True)
    # Replace infinites
    df['IP_start'].replace([np.inf, -np.inf], 3, inplace=True)

    
    # Date
    df['date'] = df['proj_date'].str.replace("-", "")
    df['date'] = df['date'].astype('int')
    
    # Keep relevant variables
    keep_list = ['date', 'firstname', 'lastname', 'mlbamid', 'steamerid'] + pitcher_stats_fg2 
    df = df[keep_list]
    
    return df