## Import libraries


In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from rapidfuzz import process
from datetime import datetime, timedelta
import re
import os

## rotowire.com: Get matchups

In [None]:


def get_matchups(datatable_id):
    # Load the options
    options = Options()
    options.add_argument("--headless")  # Optional: Run in headless mode
    options.binary_location = "C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"

    # Set up the WebDriver
    driver = webdriver.Chrome(options= options)
    driver.get("https://www.rotowire.com/baseball/stats-bvp.php")

    # Explicitly wait for the table element to load
    datatable_xpath = f"//div[@view_id='${datatable_id}']"  # Update XPATH as needed
    try:
        WebDriverWait(driver, 60).until(
            EC.presence_of_element_located((By.XPATH, datatable_xpath))
        )
        print(f"{datatable_id} table loaded successfully.")
    except Exception as e:
        print(f"Error: Table {datatable_id} did not load. Details: {e}")
        driver.quit()
        return None

    # Wait for the load of the page
    time.sleep(10)
    
    # Locate the table
    table_element = driver.find_element(By.XPATH, datatable_xpath)
    text_content = table_element.text
    
    # Process the table content
    rows = text_content.split("\n")
    table_data = [row.split("\t") for row in rows]
    
    # Convert to dataframe
    df = pd.DataFrame(table_data)

    if len(rows) <= 10:
        print(f"Skipping {datatable_id}: Table is empty.")
        driver.quit()
        return pd.DataFrame()  # Return an empty DataFrame when skipping
    
    if len(rows) > 10:
        # Convert rows into columns (headers for hot/cold batters)
        n = 16 if datatable_id in ["datatable3", "datatable4"] else 18
        reshaped_data = [df[0][i:i+n].values for i in range(0, len(df), n)]
        reshaped_df = pd.DataFrame(reshaped_data)

        # Use the first row as headers
        reshaped_df.columns = reshaped_df.iloc[0]
        reshaped_df = reshaped_df[1:].reset_index(drop=True)

        # Remove rows based on datatable_id
        rows_to_remove = 16 if datatable_id in ["datatable3", "datatable4"] else 18
        df = df.iloc[rows_to_remove:].reset_index(drop=True)

        # Check the character count
        df['char_count'] = df[0].apply(len)
        cutoff_value = 3 if datatable_id in ["datatable3", "datatable4"] else 2
        cutoff_index = df[df["char_count"] <= cutoff_value].index.min()

        # Keep only rows above the cutoff index
        df_filtered = df.loc[:cutoff_index - 1]
        num_rows = len(df_filtered)

        # Group and pivot
        df["group"] = (df.index // num_rows)
        df = df.rename(columns={0: "data_column"})
        df["mod_index"] = df.index % num_rows
        df_new = df.pivot(index="mod_index", columns="group", values="data_column")
        df_new.columns = reshaped_df.columns
        
        # Add suffix for hot and cold
        suffix = '_hot' if datatable_id in ["datatable1", "datatable3"] else '_cold'
        
        # Add the suffix to the table
        df_new =  df_new.rename(columns= lambda col: col + suffix)
        
        # Close the driver
        driver.quit()
    else:
        df_new = pd.DataFrame()
        driver.quit()
    
    return df_new

# Create a dictionary to store the dataframes
dataframes_matchups = {}

# ?Apply the function
# Create a list with the 4 datatables
datatable_ids = ['datatable1', 'datatable2', 'datatable3', 'datatable4']

for datatable_id in datatable_ids:
    print(f"Processing {datatable_id} from rotowire.com ...")
    result_df = get_matchups(datatable_id)
    if result_df is not None:
        # Store the dataframe in dictionary
        dataframes_matchups[datatable_id] = result_df

# Extract each one into their dataframes
hot_hitter_matchups   = dataframes_matchups['datatable1']
cold_hitter_matchups  = dataframes_matchups['datatable2']
hot_pitcher_matchups  = dataframes_matchups['datatable3']
cold_pitcher_matchups = dataframes_matchups['datatable4']

Processing datatable1 from rotowire.com ...
datatable1 table loaded successfully.
Processing datatable2 from rotowire.com ...
datatable2 table loaded successfully.
Processing datatable3 from rotowire.com ...
datatable3 table loaded successfully.
Processing datatable4 from rotowire.com ...
datatable4 table loaded successfully.
Skipping datatable4: Table is empty.


## fantasypros.com: Get hitters' stats

In [3]:
def hitting_stats(nb_last_days):
    # Load the options
    options = Options()
    options.add_argument("--headless")  # Optional: Run in headless mode
    options.binary_location = "C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"
    
    # Set up the WebDriver
    driver = webdriver.Chrome(options= options)
    if nb_last_days == 1:
        driver.get("https://www.fantasypros.com/mlb/stats/hitters.php?range=1&page=ALL")
    elif nb_last_days == 7:
        driver.get("https://www.fantasypros.com/mlb/stats/hitters.php?range=7&page=ALL")
    elif nb_last_days == 15:
        driver.get("https://www.fantasypros.com/mlb/stats/hitters.php?range=15&page=ALL")
    elif nb_last_days == 30:
        driver.get("https://www.fantasypros.com/mlb/stats/hitters.php?range=30&page=ALL")
    else:
        # Get the full season
        driver.get("https://www.fantasypros.com/mlb/stats/hitters.php?range=2025&page=ALL")

    datatable_id = 'data'

    # Explicitly wait for the table element to load
    datatable_xpath = f"//table[@id='{datatable_id}']"  # Update XPATH as needed
    try:
        WebDriverWait(driver, 60).until(
            EC.presence_of_element_located((By.XPATH, datatable_xpath))
        )
        print(f"{datatable_id} table loaded successfully.")
    except Exception as e:
        print(f"Error: Table {datatable_id} did not load. Details: {e}")
        driver.quit()

    # Wait for the load of the page
    time.sleep(10)

    # Locate the table
    table_element = driver.find_element(By.XPATH, datatable_xpath)
    text_content = table_element.text

    # Process the table content
    rows = text_content.split("\n")
    table_data = [row.split("\t") for row in rows]

    # Convert to dataframe
    df = pd.DataFrame(table_data)

    # Remove the first row
    df = df.iloc[1:].reset_index(drop= True)

    # Convert rows into columns
    reshaped_data = [df[0][i:i+17].values for i in range(0, len(df), 17)]

    # Create the reshaped dataframe
    reshaped_df = pd.DataFrame(reshaped_data)

    # Use the first row as headers
    reshaped_df.columns = reshaped_df.iloc[0]
    reshaped_df = reshaped_df[1:].reset_index(drop= True)

    # Remove all data from the table
    reshaped_df = reshaped_df.drop(reshaped_df.index)

    # Drop the first column
    reshaped_df.drop(reshaped_df.columns[0], axis= 1, inplace= True)

    # Remove the unnecessary rows 
    df = df.iloc[17:].reset_index(drop= True)
    
    # Keep rows where the text starts with a number
    df = df[df[0].str.match(r'^\d')]

    # Splitting the column from right to left 15 times
    df_split = df[0].str.rsplit(' ', n= 15, expand= True)

    # # Remove strings starting from the parenthesis
    # df_split[0] = df_split[0].str.replace(r"\s*\(.*\)", "", regex= True)

    # # Remove numbers at the beginning of the string
    # df_split[0] = df_split[0].str.replace(r"^\d+\s*", "", regex= True)
    
    # Update the headers
    df_split.columns = reshaped_df.columns

    if nb_last_days == 1:
        suffix = '_yesterday'
    elif nb_last_days == 7:
        suffix = '_7'
    elif nb_last_days == 15:
        suffix = '_15'
    elif nb_last_days == 30:
        suffix = '_30'
    else:
        suffix = '_full'

    # Add the suffix to the table
    df_split =  df_split.rename(columns= lambda col: col + suffix)
    
    # Show a message
    if nb_last_days != 99:
        print(f"Hitter's last {nb_last_days} games loaded successfully.")
    else:
        print(f"Hitter's full season loaded successfully.")
        
    return df_split


# Call the function
yesterday_games      = hitting_stats(1)
last_seven_games     = hitting_stats(7)
last_fifteen_games   = hitting_stats(15)
last_thirty_games    = hitting_stats(30)
season_to_date_games = hitting_stats(99)


data table loaded successfully.
Hitter's last 1 games loaded successfully.
data table loaded successfully.
Hitter's last 7 games loaded successfully.
data table loaded successfully.
Hitter's last 15 games loaded successfully.
data table loaded successfully.
Hitter's last 30 games loaded successfully.
data table loaded successfully.
Hitter's full season loaded successfully.


## Combine rotowire and fantasypros data


In [4]:


def match_names(name, choices):
    """Find a match between the players

    Args:
        name (_type_): Player name to match
        choices (_type_): List of possible matches

    Returns:
        _type_: Best match or None if no match found
    """
    match = process.extractOne(name, choices, score_cutoff=80)
    if match:  # Check if a match is found
        best_match, score, _ = match
        return best_match
    # Return None if no match was found. 
    # It means the player didn't play yesterday.
    return None  


def create_matched_tables(hot_df, cold_df, hot_field_name, cold_field_name ):
    # Add the matched names in the table.
    # Join the fields (hot hitters)
    hot_df['matched_player_yesterday']      = hot_df[hot_field_name].apply(lambda x: match_names(x, yesterday_games['PLAYER_yesterday']))
    hot_df['matched_player_last_seven']     = hot_df[hot_field_name].apply(lambda x: match_names(x, last_seven_games['PLAYER_7']))
    hot_df['matched_player_last_fifteen']   = hot_df[hot_field_name].apply(lambda x: match_names(x, last_fifteen_games['PLAYER_15']))
    hot_df['matched_player_last_thirty']    = hot_df[hot_field_name].apply(lambda x: match_names(x, last_thirty_games['PLAYER_30']))
    hot_df['matched_player_season_to_date'] = hot_df[hot_field_name].apply(lambda x: match_names(x, season_to_date_games['PLAYER_full']))

    # Join the fields (cold hitters)
    cold_df['matched_player_yesterday']      = cold_df[cold_field_name].apply(lambda x: match_names(x, yesterday_games['PLAYER_yesterday']))
    cold_df['matched_player_last_seven']     = cold_df[cold_field_name].apply(lambda x: match_names(x, last_seven_games['PLAYER_7']))
    cold_df['matched_player_last_fifteen']   = cold_df[cold_field_name].apply(lambda x: match_names(x, last_fifteen_games['PLAYER_15']))
    cold_df['matched_player_last_thirty']    = cold_df[cold_field_name].apply(lambda x: match_names(x, last_thirty_games['PLAYER_30']))
    cold_df['matched_player_season_to_date'] = cold_df[cold_field_name].apply(lambda x: match_names(x, season_to_date_games['PLAYER_full']))
    
    # Select the last 4 columns and replace NaN values with "Did not play"
    hot_df.iloc[:, -4:]  = hot_df.iloc[:, -4:].fillna("Did not play")
    cold_df.iloc[:, -4:] = cold_df.iloc[:, -4:].fillna("Did not play")
    
    # Join the rest of the data (hot/cold)
    new_hot_df = pd.merge(hot_df, yesterday_games,  left_on= 'matched_player_yesterday', right_on= 'PLAYER_yesterday', how= 'left')
    new_hot_df = new_hot_df.merge(last_seven_games, left_on= 'matched_player_last_seven', right_on= 'PLAYER_7', how= 'left')
    new_hot_df = new_hot_df.merge(last_fifteen_games, left_on= 'matched_player_last_fifteen', right_on= 'PLAYER_15', how= 'left')
    new_hot_df = new_hot_df.merge(last_thirty_games, left_on= 'matched_player_last_thirty', right_on= 'PLAYER_30', how= 'left')
    new_hot_df = new_hot_df.merge(season_to_date_games, left_on= 'matched_player_season_to_date', right_on= 'PLAYER_full', how= 'left')
    
    new_cold_df = pd.merge(cold_df, yesterday_games,  left_on= 'matched_player_yesterday', right_on= 'PLAYER_yesterday', how= 'left')
    new_cold_df = new_cold_df.merge(last_seven_games, left_on= 'matched_player_last_seven', right_on= 'PLAYER_7', how= 'left')
    new_cold_df = new_cold_df.merge(last_fifteen_games, left_on= 'matched_player_last_fifteen', right_on= 'PLAYER_15', how= 'left')
    new_cold_df = new_cold_df.merge(last_thirty_games, left_on= 'matched_player_last_thirty', right_on= 'PLAYER_30', how= 'left')
    new_cold_df = new_cold_df.merge(season_to_date_games, left_on= 'matched_player_season_to_date', right_on= 'PLAYER_full', how= 'left')
    
    # Remove columns that start with "matched"
    new_hot_df  = new_hot_df.drop(columns=[col for col in new_hot_df.columns if col.startswith("matched")])
    new_cold_df = new_cold_df.drop(columns=[col for col in new_cold_df.columns if col.startswith("matched")])
    
    # Fill empty with '-'
    new_hot_df  = new_hot_df.fillna('-')
    new_cold_df = new_cold_df.fillna('-')
    
    return new_hot_df, new_cold_df


# Call the function
hot_hitter_matchups, cold_hitter_matchups = create_matched_tables(hot_hitter_matchups, cold_hitter_matchups, 'Name_hot', 'Name_cold')

## Clean tables
#### Combine yesterday results, last 7 games, last 30 games and season-to-date

In [5]:
# Clean the dataframes. Replace the suffixes for all the columns.
# and add the PA column to the dataframes.
def clean_suffix(df, suffix):
    # Remove the suffix from the column names
    df.columns = df.columns.str.replace(suffix, '', regex=False)
    
    # Create the stats_from column based on the suffix
    if suffix == '_7':
        df['stats_from']= "Last 7 games"
    elif suffix == '_15':
        df['stats_from']= "Last 15 games"
    elif suffix == '_30':
        df['stats_from']= "Last 30 games"
    elif suffix == '_full':
        df['stats_from']= "Full season"
    elif suffix == '_yesterday':
        df['stats_from']= "Yesterday's game(s)"
    
    # First convert the columns to numeric
    df['AB'] = pd.to_numeric(df['AB'], errors='coerce')
    df['BB'] = pd.to_numeric(df['BB'], errors='coerce')
    
    # Create the PA column based on AB and BB
    df['PA'] = df['AB'] + df['BB']
    
    return df

# Call the functions to clean the dataframes
yesterday_games      = clean_suffix(yesterday_games,      '_yesterday')
last_seven_games     = clean_suffix(last_seven_games,     '_7')
last_fifteen_games   = clean_suffix(last_fifteen_games,   '_15')
last_thirty_games    = clean_suffix(last_thirty_games,    '_30')
season_to_date_games = clean_suffix(season_to_date_games, '_full')


In [6]:

# Keep the columns in the dataframes for yesterday_hitters
columns_to_keep = ['PLAYER' # It is Name in the other dataframes 
                , 'PA'
                , 'AB'
                , 'H'
                , 'HR'
                , 'RBI'
                , 'SB'
                , 'BB'
                , 'K' # It is SO in the other dataframes
                , 'AVG'
                , 'OBP'
                , 'SLG'
                , 'OPS'
                , 'stats_from']

yesterday_games      = yesterday_games.loc[:,      columns_to_keep]
last_seven_games     = last_seven_games.loc[:,     columns_to_keep]
last_fifteen_games   = last_fifteen_games.loc[:,   columns_to_keep]
last_thirty_games    = last_thirty_games.loc[:,    columns_to_keep]
season_to_date_games = season_to_date_games.loc[:, columns_to_keep]

# Rename PLAYER for Name and K for SO
yesterday_games      = yesterday_games.rename(columns={'PLAYER': 'Name', 'K': 'SO'})
last_seven_games     = last_seven_games.rename(columns={'PLAYER': 'Name', 'K': 'SO'})
last_fifteen_games   = last_fifteen_games.rename(columns={'PLAYER': 'Name', 'K': 'SO'})
last_thirty_games    = last_thirty_games.rename(columns={'PLAYER': 'Name', 'K': 'SO'})
season_to_date_games = season_to_date_games.rename(columns={'PLAYER': 'Name', 'K': 'SO'})

# Concatenate the dataframes
main_df = pd.concat([yesterday_games
                    ,last_seven_games
                    ,last_fifteen_games
                    ,last_thirty_games
                    ,season_to_date_games
                    ], ignore_index= True)


def convert_columns_to_integer(df, col):
    """Convert the columns to integer

    Args:
        df (_type_): _description_
        col (_type_): _description_

    Returns:
        _type_: _description_
    """
    # Convert the 'AB' column to numeric, coercing errors to NaN
    df[col] = pd.to_numeric(df[col], errors= 'coerce')

    # Fill NaN values with 0 (or handle them as needed)
    df[col] = df[col].fillna(0).astype(int)

    return df


def convert_columns_to_float(df, col):
    """Convert the columns to float

    Args:
        df (_type_): _description_
        col (_type_): _description_

    Returns:
        _type_: _description_
    """
    # Convert the 'AB' column to numeric, coercing errors to NaN
    df[col] = pd.to_numeric(df[col], errors= 'coerce')

    # Fill NaN values with 0 (or handle them as needed)
    df[col] = df[col].fillna(0).astype(float)

    return df


# Call the function
main_df = convert_columns_to_integer(main_df, 'H')
main_df = convert_columns_to_integer(main_df, 'HR')
main_df = convert_columns_to_integer(main_df, 'RBI')
main_df = convert_columns_to_integer(main_df, 'SB')
main_df = convert_columns_to_integer(main_df, 'SO')

main_df = convert_columns_to_float(main_df, 'AVG')
main_df = convert_columns_to_float(main_df, 'OBP')
main_df = convert_columns_to_float(main_df, 'SLG')
main_df = convert_columns_to_float(main_df, 'OPS')

# Remove ( and - from the Name column
main_df['Name'] = main_df['Name'].str.replace('-', '', regex= True).str.strip()
main_df['Name'] = main_df['Name'].str.replace(r"\(", '', regex=True).str.strip()

# Continue the cleaning process
# First create a table that contains the abbreviations of the teams
teams_abbr = {
    'ATL': 'Atlanta Braves',
    'NYM': 'New York Mets', 
    'PHI': 'Philadelphia Phillies',
    'WSH': 'Washington Nationals',
    'MIA': 'Miami Marlins',
    
    'CHC': 'Chicago Cubs',
    'CIN': 'Cincinnati Reds',
    'MIL': 'Milwaukee Brewers',
    'PIT': 'Pittsburgh Pirates',
    'STL': 'St. Louis Cardinals',
    
    'SD': 'San Diego Padres',
    'SF': 'San Francisco Giants',
    'LAD': 'Los Angeles Dodgers',
    'ARI': 'Arizona Diamondbacks',
    'COL': 'Colorado Rockies',
    
    'BOS': 'Boston Red Sox',
    'NYY': 'New York Yankees',
    'TOR': 'Toronto Blue Jays',
    'BAL': 'Baltimore Orioles',
    'TB': 'Tampa Bay Rays',
    
    'CWS': 'Chicago White Sox',
    'CLE': 'Cleveland Guardians',
    'DET': 'Detroit Tigers',
    'KC': 'Kansas City Royals',
    'MIN': 'Minnesota Twins',
    
    'HOU': 'Houston Astros',
    'TEX': 'Texas Rangers',
    'LAA': 'Los Angeles Angels',
    'SEA': 'Seattle Mariners',
    'ATH': 'Athletics'
    
}

# Build a regex pattern that matches any of the abbreviations as whole words
# Using \b (word boundary) ensures only whole word matches
pattern = r'\b(?:' + '|'.join(teams_abbr.keys()) + r')\b'

# Extract the abbreviation from column 'Name' into a new column 'new_team'
main_df['Team'] = main_df['Name'].str.extract('(' + pattern + ')', expand=False)

# Remove the abbreviation from column 'A'
# The regex flag 'regex=True' allows pattern matching to remove the abbreviation wherever found
#main_df['Name'] = main_df['Name'].str.replace(pattern, '', regex=True)

# Optionally, clean up extra whitespace that may have been left behind
# and remove brackets on the right side of the string.
main_df['Name'] = main_df['Name'].str.strip()
main_df['Name'] = main_df['Name'].str.rstrip(')')

# Remove the leading numbers from the Name column
main_df['Name'] = main_df['Name'].str.replace(r'^\d+\s*', '', regex= True)

# Remove just the brackets
main_df['Name'] = main_df['Name'].str.replace(r'[\(\)]', '', regex=True)

# Map the abbreviation to its full team name
main_df['Team Full Name'] = main_df['Team'].map(teams_abbr)


def split_name_column(row):
    """Define a function to split the 'Name' column based on the team abbreviation

    Args:
        row (_type_): _description_

    Returns:
        _type_: _description_
    """
    for abbr in teams_abbr.keys():
        if abbr in row['Name']:
            # Split the name into two parts: before and after the team abbreviation
            parts = row['Name'].split(abbr, 1)
            return parts[0].strip(), abbr, parts[1].strip() if len(parts) > 1 else ''
    return row['Name'], None, None


# Apply the function to the dataframe
main_df[['Player Name', 'Team Abbreviation', 'Position']] = main_df.apply(
    lambda row: pd.Series(split_name_column(row)), axis=1
)

# Drop the original 'Name' column if no longer needed
main_df.drop(columns=['Name'], inplace=True)

In [7]:
# # Create unique combination of Name and Team
# unique_combination = main_df.drop_duplicates(subset=['Name', 'Team'])

# # Keep only the columns Name and Team
# unique_combination = unique_combination[['Name', 'Team']]

# # Drop the rows with NaN values
# unique_combination = unique_combination.dropna()

# # Reset the index
# unique_combination = unique_combination.reset_index(drop=True)

In [8]:
# # Find names with multiple occurrences
# name_counts = unique_combination['Name'].value_counts()

# # Filter names that appear more than once
# repeated_names = name_counts[name_counts > 1].index.tolist()

# print("Names appearing multiple times:", repeated_names)

# Get hot and cold hitters

In [9]:
def hot_hitters(df, period):
    if period == 'yesterday':
        # Filter the dataframe for yesterday's game(s)
        df = df.loc[df['stats_from'] == "Yesterday's game(s)"]
        
        # Keep players with at least an AVG of .250 or higher,
        # an OPS of .750, an OBP of .300 or higher and 2 or more hits.
        df = df.loc[df['AVG'] >= .27]
        df = df.loc[df['OBP'] >= .3]
        df = df.loc[df['OPS'] >= .75] 
        df = df.loc[df['H']   >= 2] 
        
    elif period == 'last_seven':
        # Filter the dataframe for last seven games
        df = df.loc[df['stats_from'] == "Last 7 games"]
        
        # Keep players with at least 4 games played, an AVG of .250 or higher and
        # an OPS of .900 or higher in the last 7 games.
        #df = df.loc[df['G'] >= 4]
        df = df.loc[df['AVG'] >= .27]
        df = df.loc[df['OPS'] >= .85]
    
    elif period == 'last_fifteen':
        # Filter the dataframe for last fifteen games
        df = df.loc[df['stats_from'] == "Last 15 games"]
        
        # Keep players with at least 10 games played, an AVG of .250 or higher and
        # an OPS of .900 or higher in the last 7 games.
        #df = df.loc[df['G'] >= 10]
        df = df.loc[df['AVG'] < .27]
        df = df.loc[df['OPS'] < .85]    
        
    elif period == 'last_thirty':
        # Filter the dataframe for last thirty games
        df = df.loc[df['stats_from'] == "Last 30 games"]
        
        # Keep players with at least 4 games played, an AVG of .250 or higher and
        # an OPS of .900 or higher in the last 18 games.
        #df = df.loc[df['G'] >= 20]
        df = df.loc[df['AVG'] >= .27]
        df = df.loc[df['OPS'] >= .85]
            
    return df


# Call the function
hot_hitters_yesterday    = hot_hitters(main_df, 'yesterday')
hot_hitters_last_seven   = hot_hitters(main_df, 'last_seven')
hot_hitters_last_fifteen = hot_hitters(main_df, 'last_fifteen')
hot_hitters_last_thirty  = hot_hitters(main_df, 'last_thirty')


def cold_hitters(df, period):
    if period == 'yesterday':
        # Filter the dataframe for yesterday's game(s)
        df = df.loc[df['stats_from'] == "Yesterday's game(s)"]
        
        # Keep players with at least an AVG of .250 or higher,
        # an OPS of .750, an OBP of .300 or higher and 2 or more hits.
        df = df.loc[df['AVG'] < .27]
        df = df.loc[df['OBP'] < .3]
        df = df.loc[df['OPS'] < .75] 
        df = df.loc[df['H']   < 2] 
        
    elif period == 'last_seven':
        # Filter the dataframe for last seven games
        df = df.loc[df['stats_from'] == "Last 7 games"]
        
        # Keep players with at least 4 games played, an AVG of .250 or higher and
        # an OPS of .900 or higher in the last 7 games.
        #df = df.loc[df['G'] >= 4]
        df = df.loc[df['AVG'] < .27]
        df = df.loc[df['OPS'] < .85]
        
    elif period == 'last_fifteen':
        # Filter the dataframe for last fifteen games
        df = df.loc[df['stats_from'] == "Last 15 games"]
        
        # Keep players with at least 10 games played, an AVG of .250 or higher and
        # an OPS of .900 or higher in the last 7 games.
        #df = df.loc[df['G'] >= 10]
        df = df.loc[df['AVG'] < .27]
        df = df.loc[df['OPS'] < .85]
        
    elif period == 'last_thirty':
        # Filter the dataframe for last thirty games
        df = df.loc[df['stats_from'] == "Last 30 games"]
        
        # Keep players with at least 10 games played, an AVG of .250 or higher and
        # an OPS of .900 or higher in the last 18 games.
        #df = df.loc[df['G'] >= 20]
        df = df.loc[df['AVG'] < .27]
        df = df.loc[df['OPS'] < .85]
        
    return df


# Call the function
cold_hitters_yesterday    = cold_hitters(main_df, 'yesterday')
cold_hitters_last_seven   = cold_hitters(main_df, 'last_seven')
cold_hitters_last_fifteen = cold_hitters(main_df, 'last_fifteen')
cold_hitters_last_thirty  = cold_hitters(main_df, 'last_thirty')

Export data

In [10]:
# Dictionary of DataFrames
dfs = {
    
    'main_df': main_df
    
    ,'hot_hitter_matchup':     hot_hitter_matchups
    , 'cold_hitter_matchup':   cold_hitter_matchups
    , 'hot_pitcher_matchups':  hot_pitcher_matchups
    , 'cold_pitcher_matchups': cold_pitcher_matchups

    
    , 'hot_hitters_yesterday':    hot_hitters_yesterday
    , 'hot_hitters_last_seven':   hot_hitters_last_seven
    , 'hot_hitters_last_fifteen': hot_hitters_last_fifteen
    , 'hot_hitters_last_seven':   hot_hitters_last_thirty
    
    , 'cold_hitters_yesterday':    cold_hitters_yesterday
    , 'cold_hitters_last_seven':   cold_hitters_last_seven
    , 'cold_hitters_last_fifteen': cold_hitters_last_fifteen
    , 'cold_hitters_last_seven':   cold_hitters_last_thirty
    
    }

#! This works only for .py files.
# # Get the current working directory and create the path for the 'output' folder
# output_folder = os.path.join(os.path.dirname(__file__), 'output')

# Get the current working directory and create the path for the 'output' folder
# output_folder = os.path.join(os.getcwd(), 'output')

output_folder = ('D:\\MLB Analyzer\\output\\')

# Save each DataFrame in the 'output' folder
for name, dataframe in dfs.items():
    dataframe.to_csv(os.path.join(output_folder, f'{name}.csv'), index=False)

Up to here, all good

In [9]:
# I need to know the pitcher against the hitter for today
# Extract the W, L and ERA and add it to hot_hitters, cold_hitters
# Need to add in full season numbers to see how it goes