### Past Results

In [6]:
# %run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
# %run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
# %run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"


# baseball_path = r'C:\Users\james\Documents\MLB\Database'

# db_path = r'C:\Users\james\Documents\MLB\Database\MLBDB.db'
# engine = create_engine(f'sqlite:///{db_path}')

In [7]:
%run "C:\Users\james\Documents\MLB\Code\B01. Matchups.ipynb"

### Gambling Info

In [9]:
def game_objects(date):
    # List of folders of game objects
    folders = os.listdir(os.path.join(baseball_path, "B02. Simulations", "Game Objects", f"Matchups {date}"))
    # Split each element by spaces and create a DataFrame
    df = pd.DataFrame([item.split() for item in folders], columns=['teams', 'game_id', 'time'])
    
    # Identify which game comes first
    df.sort_values(by=['teams', 'time'], inplace=True)
    df['game_num'] = df.groupby(['teams']).cumcount()+1

    # Determine away team
    df['away_team'] = df['teams'].str.split("@", expand=True)[0]
    
    df = pd.merge(df, team_map[['BBREFTEAM', 'BASEBALLPRESSTEAM']], left_on='away_team', right_on='BBREFTEAM', how='left')
    
    # Display the DataFrame
    return df
    

In [10]:
def gambling_info(odds_df, matchup):
    # Set folder as index
    odds_df.set_index('folder', inplace=True)
    
    # Extract info
    Spread = odds_df.loc[matchup]['Spread']
    OU = odds_df.loc[matchup]['OU']
    SpreadMoney1 = odds_df.loc[matchup]['SpreadMoney1']
    SpreadMoney2 = odds_df.loc[matchup]['SpreadMoney2']
    OuMoney1 = odds_df.loc[matchup]['OuMoney1']
    OuMoney2 = odds_df.loc[matchup]['OuMoney2']
    MLMoney1 = odds_df.loc[matchup]['MLMoney1']
    MLMoney2 = odds_df.loc[matchup]['MLMoney2']
    VisitorVegasRuns = odds_df.loc[matchup]['VisitorVegasRuns']
    HomeVegasRuns = odds_df.loc[matchup]['HomeVegasRuns']
        
    return Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns

### Over/Unders

In [8]:
def gambling_payouts(df, Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns):
    ### Spread
    df['Spread'] = Spread
    df['SpreadMoney1'] = SpreadMoney1
    df['SpreadMoney2'] = SpreadMoney2
    
    # Covered?
    df['Cover1'] = ((df['away_score'] + df['Spread']) > df['home_score']).astype('int')
    df['Cover2'] = ((df['away_score'] + df['Spread']) < df['home_score']).astype('int')
    
    # Payout (if you win)
    df['SpreadWinnings1'] = np.where(df['SpreadMoney1'] > 0, (df['SpreadMoney1'] / 100) + 1, (100 / abs(df['SpreadMoney1'])) + 1)
    df['SpreadWinnings2'] = np.where(df['SpreadMoney2'] > 0, (df['SpreadMoney2'] / 100) + 1, (100 / abs(df['SpreadMoney2'])) + 1)
    
    # Payout (expected in sim)
    df['SpreadPayout1'] = df['SpreadWinnings1'] * df['Cover1']
    df['SpreadPayout2'] = df['SpreadWinnings2'] * df['Cover2']
    
    

    ### Money Line
    df['MLMoney1'] = MLMoney1
    df['MLMoney2'] = MLMoney2
    
    # Won?
    df['AwayW'] = (df['away_score'] > df['home_score']).astype('int')
    df['HomeW'] = (df['away_score'] < df['home_score']).astype('int')
    
    # Payout (if you win)
    df['MLWinnings1'] = np.where(df['MLMoney1'] > 0, (df['MLMoney1'] / 100) + 1, (100 / abs(df['MLMoney1'])) + 1)
    df['MLWinnings2'] = np.where(df['MLMoney2'] > 0, (df['MLMoney2'] / 100) + 1, (100 / abs(df['MLMoney2'])) + 1)
    
    # Payout (expected in sim)
    df['MLPayout1'] = df['MLWinnings1'] * df['AwayW']
    df['MLPayout2'] = df['MLWinnings2'] * df['HomeW']
    
    
    
    ### Over/Under
    df['OU'] = OU
    df['OuMoney1'] = OuMoney1
    df['OuMoney2'] = OuMoney2
    
    # Total runs
    df['Total'] = df['away_score'] + df['home_score']

    # Over hit?
    df['Over'] = (df['Total'] > df['OU']).astype('int')
    df['Under'] = (df['Total'] < df['OU']).astype('int')
    df['Push'] = (df['Total'] == df['OU']).astype('int')

    # Calculate the adjusted payout for betting the over and under
    df['OuWinnings1'] = np.where(df['OuMoney1'] > 0, (df['OuMoney1'] / 100) + 1, (100 / abs(df['OuMoney1'])) + 1)
    df['OuWinnings2'] = np.where(df['OuMoney2'] > 0, (df['OuMoney2'] / 100) + 1, (100 / abs(df['OuMoney2'])) + 1)

    # Calculate the adjusted payout for betting the over and under
    df['OuPayout1'] = df['OuWinnings1'] * df['Over']
    df['OuPayout2'] = df['OuWinnings2'] * df['Under']
    
    # If it's a push, you get what you put in
    df['OuPayout1'] = np.where(df['Push'] == 1, 1, df['OuPayout1'])
    df['OuPayout2'] = np.where(df['Push'] == 1, 1, df['OuPayout2'])
    
    
    
    ### Projected Runs
    df['VisitorVegasRuns'] = VisitorVegasRuns
    df['HomeVegasRuns'] = HomeVegasRuns
    
    return df

In [9]:
# Read in actual game scores
def create_scores(games, team_map):
    # Read in team_map
    # Choose the last instance of each player in each game, assuming they have enough PAs
    # sql_query = f'''
    #     SELECT FULLNAME, BBREFTEAM
    #     FROM "Team Map"
    #     '''

    # team_map = pd.read_sql_query(sql_query, con=engine)
        
    team_map = team_map[['FULLNAME', 'BBREFTEAM']]

    all_games = []
    for game in games:
        game_list = [game['game_id'], game['game_type'], game['away_name'], game['home_name'], game['away_score'], game['home_score'], game['game_date']]
        all_games.append(game_list)

    all_games_df = pd.DataFrame(all_games, columns=['game_id', 'game_type', 'away_name', 'home_name', 'away_score', 'home_score', 'game_date'])
    
    # all_games_df = all_games_df.query('game_type == "R"')
    
    all_games_df = all_games_df.merge(team_map, left_on='away_name', right_on='FULLNAME', how='left')
    all_games_df = all_games_df.merge(team_map, left_on='home_name', right_on='FULLNAME', how='left')
    
    all_games_df.rename(columns={'BBREFTEAM_x':'away_team', 'BBREFTEAM_y':'home_team','game_date':'date'},inplace=True)
    all_games_df.drop(columns={'FULLNAME_x', 'FULLNAME_y', 'away_name', 'home_name'},inplace=True)
    
    all_games_df['date'] = all_games_df['date'].str.replace("-", "")

    

    return all_games_df

In [None]:
def bet_projections(df, Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns, away_score, home_score):
    # Add actual scores
    df['away_score_actual'] = away_score
    df['home_score_actual'] = home_score
    
    
    ### Spread Winners
    if df['SpreadPayout1'].mean() > 1:
        SpreadPick = "Away"
        SpreadPayout = df['SpreadPayout1'].mean()
        SpreadMoney = SpreadMoney1
        if away_score > (home_score + Spread):
            SpreadWin = 1
        elif away_score < (home_score + Spread):
            SpreadWin = 0
        else:
            SpreadWin = np.nan
    elif df['SpreadPayout2'].mean() > 1:
        SpreadPick = "Home"
        SpreadPayout = df['SpreadPayout2'].mean()
        SpreadMoney = SpreadMoney2
        if away_score < (home_score + Spread):
            SpreadWin = 1
        elif away_score > (home_score + Spread):
            SpreadWin = 0
        else:
            SpreadWin = np.nan
    else:
        SpreadPick = "Neither"
        SpreadPayout = np.nan
        SpreadMoney = np.nan
        SpreadWin = np.nan
    
    
    ### OU Winners
    if df['OuPayout1'].mean() > 1:
        OuPick = "Over"
        OuPayout = df['OuPayout1'].mean()
        OuMoney = OuMoney1
        if (away_score + home_score) > OU:
            OuWin = 1
        elif (away_score + home_score) < OU:
            OuWin = 0
        else:
            OuWin = np.nan
    elif df['OuPayout2'].mean() > 1:
        OuPick = "Under"
        OuPayout = df['OuPayout2'].mean()
        OuMoney = OuMoney2
        if (away_score + home_score) < OU:
            OuWin = 1
        elif (away_score + home_score) > OU:
            OuWin = 0
        else:
            OuWin = np.nan
    else:
        OuPick = "Neither"
        OuPayout = np.nan
        OuMoney = np.nan
        OuWin = np.nan
    
    
    ### ML Winners
    if df['MLPayout1'].mean() > 1:
        MLPick = "Away"
        MLPayout = df['MLPayout1'].mean()
        MLMoney = MLMoney1
        if away_score > home_score:
            MLWin = 1
        elif away_score < home_score:
            MLWin = 0
        else:
            MLWin = np.nan
    elif df['MLPayout2'].mean() > 1:
        MLPick = "Home"
        MLPayout = df['MLPayout2'].mean()
        MLMoney = MLMoney2
        if away_score < home_score:
            MLWin = 1
        elif away_score > home_score:
            MLWin = 0
        else:
            MLWin = np.nan
    else:
        MLPick = "Neither"
        MLPayout = np.nan
        MLMoney = np.nan
        MLWin = np.nan
    
    VisitorModelRuns = df['away_score'].mean()
    HomeModelRuns = df['home_score'].mean()

    return SpreadPick, SpreadPayout, SpreadMoney, SpreadWin, OuPick, OuPayout, OuMoney, OuWin, MLPick, MLPayout, MLMoney, MLWin, VisitorModelRuns, HomeModelRuns, VisitorVegasRuns, HomeVegasRuns, away_score, home_score

In [1]:
# def create_score_df(object_path):
#     game_list = []

#     for i in range(len(os.listdir(object_path))):
#         file_path = os.path.join(object_path, f'game_object_{i}.pkl')
#         with open(file_path, 'rb') as file:
#             game_object = joblib.load(file)
            
            
            
#             game_list.append(game_object)

#     # Now loaded_game_list contains the objects loaded from the folder
#     away_score_list = []
#     home_score_list = []
#     for game in game_list:
#         away_score_list.append(game.away_score)
#         home_score_list.append(game.home_score)
        
    
#     score_df = pd.DataFrame(list(zip(away_score_list, home_score_list)), columns=['away_score', 'home_score'])

#     return score_df

In [None]:
# def create_score_df(object_path):
    
#     away_score_list = []
#     home_score_list = []

#     for i in range(len(os.listdir(object_path))):
#         file_path = os.path.join(object_path, f'game_object_{i}.joblib')
#         with open(file_path, 'rb') as file:
#             game_object = joblib.load(file)
            
#             away_score_list.append(game_object.away_score)
#             home_score_list.append(game_object.home_score)
        
    
#     score_df = pd.DataFrame(list(zip(away_score_list, home_score_list)), columns=['away_score', 'home_score'])

#     return score_df

In [None]:
def create_score_df(sim_score_df, game_id):
    score_df = sim_score_df.query(f'gamePk == {game_id}')
    
    return score_df

In [None]:
def create_gambling_df(all_games_df, game_objects_df, odds_df, date, matchup):               
    # Identify which game comes first
    odds_df.sort_values(by=['VisitorTeamShort', 'EventDateTime'], inplace=True)
    odds_df['game_num'] = odds_df.groupby(['VisitorTeamShort']).cumcount()+1
    
    # Merge on object folder names
    odds_df = game_objects_df.merge(odds_df, left_on=['BASEBALLPRESSTEAM', 'game_num'], right_on=['VisitorTeamShort', 'game_num'], how='outer')
    odds_df['folder'] = odds_df['teams'] + " " + odds_df['game_id'].astype('str') + " " + odds_df['time'].astype('str')
    
    # Extract gambling information
    Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns = gambling_info(odds_df, matchup)
    
    # Read in dataframe with sim scores
    score_df = create_score_df(os.path.join(baseball_path, "B02. Simulations", "Game Objects", f"Matchups {date}", f"{matchup}"))
    
    # Create dataframe with expected payouts
    payout_df = gambling_payouts(score_df, Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns)
    
    # Extract scores
    game_id = int(odds_df.loc[matchup]['game_id'])
    away_score, home_score = all_games_df.loc[game_id]['away_score'], all_games_df.loc[game_id]['home_score']
    
    # Extract success metrics
    SpreadPick, SpreadPayout, SpreadMoney, SpreadWin, OuPick, OuPayout, OuMoney, OuWin, MLPick, MLPayout, MLMoney, MLWin, VisitorModelRuns, HomeModelRuns, VisitorVegasRuns, HomeVegasRuns, away_score, home_score = bet_projections(payout_df, Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns, away_score, home_score)
    
    # Create dataframe
    column_names = ['matchup', 'SpreadPick', 'SpreadPayout', 'SpreadMoney', 'SpreadWin', 'OuPick', 'OuPayout', 'OuMoney', 'OuWin', 'MLPick', 'MLPayout', 'MLMoney', 'MLWin', 'VisitorModelRuns', 'HomeModelRuns', 'VisitorVegasRuns', 'HomeVegasRuns', 'away_score', 'home_score']

    df = pd.DataFrame([[
        matchup, SpreadPick, SpreadPayout, SpreadMoney, SpreadWin,
        OuPick, OuPayout, OuMoney, OuWin, MLPick, MLPayout, MLMoney,
        MLWin, VisitorModelRuns, HomeModelRuns, VisitorVegasRuns,
        HomeVegasRuns, away_score, home_score
    ]], columns=column_names)
    
    return df

In [None]:
def process_date(date):
    # try:
    gambling_df_list = []


    
    # Read in game objects
    game_objects_df = game_objects(date)
    # Read in odds
    odds_df = pd.read_csv(os.path.join(baseball_path, "C01. Gambling", f"Odds {date}.csv"))

    # Loop over all games for which there are game objects
    for matchup in os.listdir(os.path.join(baseball_path, "B02. Simulations", "Game Objects", f"Matchups {date}")):    
        # Create dataframe with gambling info
        df = create_gambling_df(all_games_df, game_objects_df, odds_df, date, matchup)
        gambling_df_list.append(df)

    gambling_df = pd.concat(gambling_df_list, axis=0).reset_index(drop=True)
    gambling_df['date'] = date

    return gambling_df  
    # except:
    #     return

In [None]:
def clean_gambling(df):
    df['SpreadMoney'] = np.where(df['SpreadMoney'] > 0, (df['SpreadMoney'] / 100) + 1, (100 / abs(df['SpreadMoney'])) + 1)
    df['OuMoney'] = np.where(df['OuMoney'] > 0, (df['OuMoney'] / 100) + 1, (100 / abs(df['OuMoney'])) + 1)
    df['MLMoney'] = np.where(df['MLMoney'] > 0, (df['MLMoney'] / 100) + 1, (100 / abs(df['MLMoney'])) + 1)
    
    # Calculate actual money earned
    df['SpreadMoney'] = df['SpreadMoney'] * df['SpreadWin']
    df['OuMoney'] = df['OuMoney'] * df['OuWin']
    df['MLMoney'] = df['MLMoney'] * df['MLWin']
    
    df.replace([np.inf, -np.inf], np.nan, inplace=True)

    return df

In [11]:
# def process_draft_group(draftGroupId):
#     output_lists = []

#     for matchup in os.listdir(rf"C:\Users\james\Documents\MLB\Database\B02. Simulations\1. Game Sims\Simulations {draftGroupId}"):
#         print(matchup)
#         # Extract gambling information (lines, over/unders, etc...)
#         try:
#             Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns, away_score, home_score = extract_gambling_info(matchup, team_map1, team_map2)
#             # Read score_df
#             score_df = pd.read_csv(rf'C:\Users\james\Documents\MLB\Database\B02. Simulations\1. Game Sims\Simulations {draftGroupId}\{matchup}')
#             # Create payout dataframe
#             payout_df = gambling_payouts(score_df, Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns)
#             SpreadPick, SpreadPayout, SpreadMoney, SpreadWin, OuPick, OuPayout, OuMoney, OuWin, MLPick, MLPayout, MLMoney, MLWin, VisitorModelRuns, HomeModelRuns, VisitorVegasRuns, HomeVegasRuns, away_score, home_score = bet_projections(payout_df, Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns, away_score, home_score)
#             output_list = [matchup, SpreadPick, SpreadPayout, SpreadMoney, SpreadWin, OuPick, OuPayout, OuMoney, OuWin, MLPick, MLPayout, MLMoney, MLWin, VisitorModelRuns, HomeModelRuns, VisitorVegasRuns, HomeVegasRuns, away_score, home_score]

#             output_lists.append(output_list)
#         except:
#             pass

        
        
#     # Create a DataFrame with column names
#     column_names = ['matchup', 'SpreadPick', 'SpreadPayout', 'SpreadMoney', 'SpreadWin', 'OuPick', 'OuPayout', 'OuMoney', 'OuWin', 'MLPick', 'MLPayout', 'MLMoney', 'MLWin', 'VisitorModelRuns', 'HomeModelRuns', 'VisitorVegasRuns', 'HomeVegasRuns', 'away_score', 'home_score']
#     df = pd.DataFrame(output_lists, columns=column_names)

#     df['SpreadMoney'] = np.where(df['SpreadMoney'] > 0, (df['SpreadMoney'] / 100) + 1, (100 / abs(df['SpreadMoney'])) + 1)
#     df['OuMoney'] = np.where(df['OuMoney'] > 0, (df['OuMoney'] / 100) + 1, (100 / abs(df['OuMoney'])) + 1)
#     df['MLMoney'] = np.where(df['MLMoney'] > 0, (df['MLMoney'] / 100) + 1, (100 / abs(df['MLMoney'])) + 1)

    
#     return df

In [12]:
# def read_team_map1():
#     sql_query = f'''
#         SELECT DKTEAM, BBREFTEAM, teamId
#         FROM "Team Map"
#         '''

#     team_map1 = pd.read_sql_query(sql_query, con=engine)
#     team_map1.set_index('DKTEAM', inplace=True)
    
#     return team_map1

In [14]:
# def read_team_map2():
#     sql_query = f'''
#             SELECT DKTEAM, BASEBALLPRESSTEAM
#             FROM "Team Map"
#             '''

#     team_map2 = pd.read_sql_query(sql_query, con=engine)
#     team_map2.set_index('BASEBALLPRESSTEAM', inplace=True)
    
#     return team_map2

In [10]:
# # This uses game_info from B01 and gambling_info from C01 to return game-specific gambling info and also actual scores
# def extract_gambling_info(filename, team_map1, team_map2):
#    # Long matchup
#     # Clean matchup file to work with game_info
#     # This is necessary because it's reading from files now, which can't have certain characters
#     # Use regular expression to extract components
#     match = re.match(r"([A-Z]+@[A-Z]+) (\d{2})(\d{2})(\d{4}) (\d{2})(\d{2})PM ET", filename)
#     if match:
#         teams, month, day, year, hour, minute = match.groups()
#         formatted_date_time = f"{month}/{day}/{year} {hour}:{minute}"
#     # New matchup name (should match DK's use)
#     long_matchup = f"{teams} {formatted_date_time} ET"


#     date_slash, gamePk, venue_id, away, home, away_starter, home_starter = game_info(long_matchup, games, team_map1)
#     print(gamePk)

#     # Extract away_score and home_score for the specified game_id
#     away_score = all_games_df.loc[gamePk, 'away_score']
#     home_score = all_games_df.loc[gamePk, 'home_score']


#     date = year + month + day
#     odds_df = pd.read_csv(os.path.join(baseball_path, "A08. Odds", f"Odds {date}.csv"))


#     Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns = gambling_info(filename, odds_df, team_map2)
    
#     return Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns, away_score, home_score

In [6]:
# def gambling_info(filename, df, team_map):
#     # Acquire DKTEAM names from team_map
#     result_df = df.merge(team_map, how='left', left_on='VisitorTeamShort', right_on='BASEBALLPRESSTEAM')
#     result_df = result_df.rename(columns={'DKTEAM': 'away'})

#     result_df = result_df.merge(team_map, how='left', left_on='HomeTeamShort', right_on='BASEBALLPRESSTEAM')
#     result_df = result_df.rename(columns={'DKTEAM': 'home'})


#     # Convert to datetime
#     result_df['EventDateTime'] = pd.to_datetime(result_df['EventDateTime'])

#     # Create the 'filename' column (this should match DK's names
#     result_df['filename'] = result_df['away'] + "@" + result_df['home'] + " " + result_df['EventDateTime'].dt.strftime('%m%d%Y %I%M%p') + ' ET.csv'
    
#     # Try to look up the row based on the exact match in the "filename" column
#     exact_match_row = result_df[result_df['filename'] == filename]

#     if not exact_match_row.empty:
#         # If there is an exact match, extract the required columns as a tuple
#         data_tuple = tuple(exact_match_row[['Spread', 'OU', 'SpreadMoney1', 'SpreadMoney2',
#                                             'OuMoney1', 'OuMoney2', 'MLMoney1', 'MLMoney2',
#                                             'VisitorVegasRuns', 'HomeVegasRuns']].iloc[0])
#     else:
#         print("No Full Match")
#         # If there is no exact match, extract team names from the filename
#         away_team = filename.split('@')[0]

#         # Look up the values in the "away" and "home" columns
#         row = result_df[(result_df['away'] == away_team)] 

#         # Combine the extracted values into a tuple
#         data_tuple = tuple(row[['Spread', 'OU', 'SpreadMoney1', 'SpreadMoney2',
#                                 'OuMoney1', 'OuMoney2', 'MLMoney1', 'MLMoney2',
#                                 'VisitorVegasRuns', 'HomeVegasRuns']].iloc[0])

#     # Extract info
#     Spread = data_tuple[0]
#     OU = data_tuple[1]
#     SpreadMoney1 = data_tuple[2]
#     SpreadMoney2 = data_tuple[3]
#     OuMoney1 = data_tuple[4]
#     OuMoney2 = data_tuple[5]
#     MLMoney1 = data_tuple[6]
#     MLMoney2 = data_tuple[7]
#     VisitorVegasRuns = data_tuple[8]
#     HomeVegasRuns = data_tuple[9]
        
#     return Spread, OU, SpreadMoney1, SpreadMoney2, OuMoney1, OuMoney2, MLMoney1, MLMoney2, VisitorVegasRuns, HomeVegasRuns