In [None]:
from pybaseball import playerid_reverse_lookup
from pybaseball import batting_stats_range
from pybaseball import pitching_stats_range
from pybaseball import batting_stats_bref
import statsapi
import pandas as pd
from datetime import date,timedelta
from fuzzywuzzy import process, fuzz
from pybaseball import cache
from IPython.display import display
from dateutil import parser
import fangraphs_db as fg

cache.enable()
# cache.config.cache_type='csv'
# cache.config.save()

######################################## defintions ##############################################

def get_team_ids(abrv):
    #lookup teams with abbreviation from input
    teams = statsapi.lookup_team(abrv)
    
    #gather team ids from query
    team_ids = []
    for id_ in teams:
        team_ids.append(id_['id'])
    
    return team_ids

def calc_ops(AB,H,BB,HBP,TB,SF):
    try:
        OPS = (AB*(H+BB+HBP) + TB*(AB+BB+SF+HBP) )/(AB*(AB+BB+SF+HBP))
    except:
        OPS = 999
    return OPS

def calc_TB(H,twoB,threeB,HR):
    oneB = H - (twoB+threeB+HR)
    TB = (oneB) + (2*twoB) + (3*threeB) + (4*HR)
    return TB


def eat_pitcher_puke_ERA(pitcher,pitching_range):
    # calculate pitcher ERA
    ERA = 0
    total_pitchers = pitching_range['Name'].tolist()
    try:
        
#         print(pitching_range[pitching_range.Name==pitcher].GS.values[0])
        if pitching_range[pitching_range.Name==pitcher].GS.values[0]<1:
            print(pitcher +' has not played any games in x days')
#             print(pitching_range[pitching_range.Name==pitcher].GS.values[0])
            ERA = 999
            GS = pitching_range[pitching_range.Name==pitcher].GS.values[0]

        else:    
            ERA = pitching_range[pitching_range.Name==pitcher].ERA.values[0]
            GS = pitching_range[pitching_range.Name==pitcher].GS.values[0]


    except:
        print(pitcher + ' was not found')
        
        # use fuzzy library to find closest name match.
        # if closest match score is greater than 80, then use that name
        
        closest_match = process.extract(pitcher,total_pitchers,scorer=fuzz.token_sort_ratio)
        if closest_match[0][1] > 80:
            print('Using closest match: ' + closest_match[0][0])
            pitcher = closest_match[0][0]
            

#             print(pitching_range[pitching_range.Name==pitcher].GS.values[0])
            if pitching_range[pitching_range.Name==pitcher].GS.values[0]<1:
                print('Player has not played any games in x days')
                ERA = 999
                GS = 0

            else:    
                ERA = pitching_range[pitching_range.Name==pitcher].ERA.values[0]
                GS = pitching_range[pitching_range.Name==pitcher].GS.values[0]

        else:
            print('No closest match found')
            ERA = 999
            GS = 0

    
    
    return ERA,GS
    

    
def eat_pitcher_puke_adv(pitcher,pitching_range):
    # calculate pitcher ERA
    ERA = 0
    try:
        
#         print(pitching_range[pitching_range.Name==pitcher].GS.values[0])
        if pitching_range[pitching_range.Name==pitcher].GS.values[0]<1:
            print(pitcher +' has not played any games in x days')
#             print(pitching_range[pitching_range.Name==pitcher].GS.values[0])
            FIP = 999
            xFIP = 999

        else:    
            FIP = pitching_range[pitching_range.Name==pitcher].FIP.values[0]
            xFIP = pitching_range[pitching_range.Name==pitcher].xFIP.values[0]


    except:
        print(pitcher + ' was not found')
        
        # use fuzzy library to find closest name match.
        # if closest match score is greater than 80, then use that name
        
        closest_match = process.extract(pitcher,total_pitchers,scorer=fuzz.token_sort_ratio)
        if closest_match[0][1] > 80:
            print('Using closest match: ' + closest_match[0][0])
            pitcher = closest_match[0][0]
            

#             print(pitching_range[pitching_range.Name==pitcher].GS.values[0])
            if pitching_range[pitching_range.Name==pitcher].GS.values[0]<1:
                print('Player has not played any games in x days')
                FIP = 999
                xFIP = 999

            else:    
                FIP = pitching_range[pitching_range.Name==pitcher].FIP.values[0]
                xFIP = pitching_range[pitching_range.Name==pitcher].xFIP.values[0]

        else:
            print('No closest match found')
            FIP = 999
            xFIP = 999

    
    
    return FIP,xFIP    
    
    
def eat_pitcher_puke_5RA(pitcher,pitching_range):
    # calculate pitcher ERA
    ERA = 0
    total_pitchers = pitching_range['Name'].tolist()
    
    try:
        ER = pitching_range[pitching_range.Name==pitcher].ER.sum()
        IP = pitching_range[pitching_range.Name==pitcher].IP.sum()
        
        if IP==0:
            print(pitcher +' has not played any games in x days')
            
        ERA = (ER/IP)*5
    except:
        print(pitcher + ' was not found')
        
        # use fuzzy library to find closest name match.
        # if closest match score is greater than 80, then use that name
        
        closest_match = process.extract(pitcher,total_pitchers,scorer=fuzz.token_sort_ratio)
        if closest_match[0][1] > 80:
            print('Using closest match: ' + closest_match[0][0])
            pitcher = closest_match[0][0]
            
            ER = pitching_range[pitching_range.Name==pitcher].ER.sum()
            IP = pitching_range[pitching_range.Name==pitcher].IP.sum()
            
            ERA = (ER/IP)*5
        else:
            print('No closest match found')
            return
        
    return ERA
        
    

def eat_lineup_puke_OPS(lineup,batting_range):
    # calculate lineup OPS

    total_players = batting_range['Name'].tolist()
    
    lineupABs = []
    lineupHs = [] 
    lineupBBs = []
    lineupHBPs = []
    lineupTBs = []
    lineupSFs = []


    for player in lineup:
        try:
            lineupABs.append(batting_range[batting_range.Name==player].AB.values[0])
            lineupHs.append(batting_range[batting_range.Name==player].H.values[0])
            lineupBBs.append(batting_range[batting_range.Name==player].BB.values[0])
            lineupHBPs.append(batting_range[batting_range.Name==player].HBP.values[0])


            lineupSFs.append(batting_range[batting_range.Name==player].SF.values[0])

            H = batting_range[batting_range.Name==player]['H'].values[0]
            twoB = batting_range[batting_range.Name==player]['2B'].values[0]
            threeB = batting_range[batting_range.Name==player]['3B'].values[0]
            HRs = batting_range[batting_range.Name==player]['HR'].values[0]

            lineupTBs.append(calc_TB(H,twoB,threeB,HRs))

        except:
            print('Batter ' + player + ' was not found')
            
            # use fuzzy library to find closest name match.
            # if closest match score is greater than 80, then use that name
            
            closest_match = process.extract(player,total_players,scorer=fuzz.token_sort_ratio)
            if closest_match[0][1] > 80:
                print('Using closest match: ' + closest_match[0][0])
                
                player = closest_match[0][0]
                
                lineupABs.append(batting_range[batting_range.Name==player].AB.values[0])
                lineupHs.append(batting_range[batting_range.Name==player].H.values[0])
                lineupBBs.append(batting_range[batting_range.Name==player].BB.values[0])
                lineupHBPs.append(batting_range[batting_range.Name==player].HBP.values[0])


                lineupSFs.append(batting_range[batting_range.Name==player].SF.values[0])

                H = batting_range[batting_range.Name==player]['H'].values[0]
                twoB = batting_range[batting_range.Name==player]['2B'].values[0]
                threeB = batting_range[batting_range.Name==player]['3B'].values[0]
                HRs = batting_range[batting_range.Name==player]['HR'].values[0]

                lineupTBs.append(calc_TB(H,twoB,threeB,HRs))
            else:
                print('No closest match found.')

                
                

    OPS = calc_ops(sum(lineupABs),sum(lineupHs),sum(lineupBBs),sum(lineupHBPs),sum(lineupTBs),sum(lineupSFs))
    
    return OPS

def eat_lineup_puke_wrcplus(lineup,batting_range):
    # given a lineup, calculate the lineup's wRC+ by weighting each members plate appearances
    
    total_players = batting_range['Name'].tolist()
    
    lineupPAs = []
    lineupWRCs = []


    for player in lineup:
        try:
            lineupPAs.append(batting_range[batting_range.Name==player].PA.values[0])
            lineupWRCs.append(batting_range[batting_range.Name==player].wRCplus.values[0])
        except:
            print('Batter ' + player + ' was not found')
            
            # use fuzzy library to find closest name match.
            # if closest match score is greater than 80, then use that name
            
            closest_match = process.extract(player,total_players,scorer=fuzz.token_sort_ratio)
            if closest_match[0][1] > 80:
                print('Using closest match: ' + closest_match[0][0])
                
                player = closest_match[0][0]
                
                lineupPAs.append(batting_range[batting_range.Name==player].PA.values[0])
                lineupWRCs.append(batting_range[batting_range.Name==player].wRCplus.values[0])
            else:
                print('No closest match found.')
                
                
    wRCplus = calc_wRCplus(lineupPAs,lineupWRCs)
                    
    return wRCplus
def calc_wRCplus(PAs,WRCs):
    
    wRCplus = sum(x * y for x, y in zip(WRCs, PAs)) / sum(PAs)
    
    
    return wRCplus
    
    
######################################## main code ##############################################

def main(useDate,days,getdata,debug):
    # inputs: days = just give how many days you want to go back to pull rolling averages
    # outputs: prints home OPS and away OPS (rolling average last x days) for every game today
    
    
    # todays_date = whatever date is in useDate
    # yesterdays_date = one day before useDate
    # last_x_days = "days" before todays date
    
    # get the game_ids for today's games
    todays_date = useDate
    yesterdays_date = todays_date - timedelta(days=1)
    last_x_days = todays_date  - timedelta(days=days)
    print(todays_date,yesterdays_date,last_x_days)
    
    todays_games = statsapi.schedule(start_date=todays_date,end_date=todays_date)
    if debug==1:
        print(todays_games)
    
    # get info from todays games. such as the h/a ids, and probable pitchers
    
    home_team_ids = []
    away_team_ids = []
    game_ids = []
    home_probable_pitcher = []
    away_probable_pitcher = []
    

    for game in todays_games:
            if len(game['home_probable_pitcher'].split(' ')) < 2 and len(game['away_probable_pitcher'].split(' ')) < 2:
                print('no probable pitchers for this game. skipping.')
            else:
                home_team_ids.append(game['home_id'])
                away_team_ids.append(game['away_id'])
                game_ids.append(game['game_id'])
                home_probable_pitcher.append(game['home_probable_pitcher'])
                away_probable_pitcher.append(game['away_probable_pitcher'])


    home_OPS = []
    away_OPS = []
    home_wRCplus = []
    away_wRCplus = []
    home_pitcher_name = []
    away_pitcher_name = []
    home_ERA = []
    away_ERA = []
    home_FIP = []
    away_FIP = []
    home_xFIP = []
    away_xFIP = []
    home_pitcher_GS = []
    away_pitcher_GS = []
    home_team = []
    away_team = []
    game_dates = []
    game_times = []
    home_after_5 = []
    away_after_5 = []
    home_total_score = []
    away_total_score = []
    
    
    batting_range = batting_stats_range(last_x_days.isoformat(),yesterdays_date.isoformat())
    pitching_range = pitching_stats_range(last_x_days.isoformat(),yesterdays_date.isoformat())
    
    #### need to add season options in this....###

    batting_leaderboard = fg.download_leaderboard('https://www.fangraphs.com/leaders.aspx?'
                                      'pos=all&stats=bat&lg=all&qual=0&type=8&season=2021&'
                                      'month=1000&season1=2021&ind=0&startdate={start}&'
                                      'enddate={stop}'.format(start=last_x_days.isoformat(),
                                                              stop=yesterdays_date.isoformat()))
        
    fg_batting_range = fg.clean_file()
    
    fg_batting_range = fg_batting_range.dropna(subset=['wRCplus'])
#     display(fg_batting_range)
    
    pitching_leaderboard = fg.download_leaderboard('https://www.fangraphs.com/leaders.aspx?'
                                      'pos=all&stats=pit&lg=all&qual=0&type=8&season=2021&'
                                      'month=1000&season1=2021&ind=0&startdate={start}&'
                                      'enddate={stop}'.format(start=last_x_days.isoformat(),
                                                              stop=yesterdays_date.isoformat()))
    fg_pitching_range = fg.clean_file()
    
#     display(fg_pitching_range)
    
    # loop thru all games in game_ids and compile batting data for each lineup, and pitching data for each probable pitcher
    i=0
    for game_id in game_ids:
        # Get lineups
        test = statsapi.get('schedule',{'gamePk':game_id,'sportId':1,'hydrate':'lineups'})
        
        
        
        
        # Get home and away team names
        homeTeam = test['dates'][0]['games'][0]['teams']['home']['team']['name']
        awayTeam = test['dates'][0]['games'][0]['teams']['away']['team']['name']
        
        dateOfGame = test['dates'][0]['date']
        
        timeOfGame = parser.parse(test['dates'][0]['games'][0]['gameDate']).time()
        
        
        # Get players/player ids
        awayPlayerIds = []
        homePlayersIds = []

        awayPlayersNames = []
        homePlayersNames = []
        try:
            for player in test['dates'][0]['games'][0]['lineups']['homePlayers']:
                homePlayersNames.append(player['fullName'])
                homePlayersIds.append(player['id'])
            for player in test['dates'][0]['games'][0]['lineups']['awayPlayers']:
                awayPlayersNames.append(player['fullName'])
                awayPlayerIds.append(player['id'])
                

           # get batting range for last 30 days

            tHomeTeamOPS = eat_lineup_puke_OPS(homePlayersNames,batting_range)
            tAwayTeamOPS = eat_lineup_puke_OPS(awayPlayersNames,batting_range)
            
            
            # get wRC+ for last 30 days
            tHomeTeamwRCPlus = eat_lineup_puke_wrcplus(homePlayersNames,fg_batting_range)
            tAwayTeamwRCPlus = eat_lineup_puke_wrcplus(awayPlayersNames,fg_batting_range)

            # get pitching stats from last 30 days
            # get era

            tHomeTeamERA,tHomeTeamGS = eat_pitcher_puke_ERA(home_probable_pitcher[i],pitching_range)
            tAwayTeamERA,tAwayTeamGS = eat_pitcher_puke_ERA(away_probable_pitcher[i],pitching_range)
            
            # get adv stats (FIP and xFIP)
            tHomeTeamFIP,tHomeTeamxFIP = eat_pitcher_puke_adv(home_probable_pitcher[i],fg_pitching_range)
            tAwayTeamFIP,tAwayTeamxFIP = eat_pitcher_puke_adv(away_probable_pitcher[i],fg_pitching_range)
            
            # get scores from games if they have already happened

            home_score = []
            away_score = []
            
            if date.fromisoformat(dateOfGame) != date.today():
                boxScore = statsapi.get('game_linescore',{'gamePk':game_id})
                for inning in boxScore['innings']:
                    if 'runs' in inning['home']:
                        home_score.append(inning['home']['runs'])
                    else:
                        home_score.append(0)

                    away_score.append(inning['away']['runs'])

                home_after_5.append(sum(home_score[0:5]))
                away_after_5.append(sum(away_score[0:5]))

                home_total_score.append(sum(home_score))
                away_total_score.append(sum(away_score))
            else:
                home_after_5.append('TBD')
                away_after_5.append('TBD')
                
                home_total_score.append('TBD')
                away_total_score.append('TBD')
            
            
            # append to lists
            home_OPS.append(tHomeTeamOPS)
            away_OPS.append(tAwayTeamOPS)
            
            home_wRCplus.append(tHomeTeamwRCPlus)
            away_wRCplus.append(tAwayTeamwRCPlus)

            home_pitcher_name.append(home_probable_pitcher[i])
            away_pitcher_name.append(away_probable_pitcher[i])
            
            home_ERA.append(tHomeTeamERA)
            away_ERA.append(tAwayTeamERA)

            home_FIP.append(tHomeTeamFIP)
            away_FIP.append(tAwayTeamFIP)
            
            home_xFIP.append(tHomeTeamxFIP)
            away_xFIP.append(tAwayTeamxFIP)
            
            home_pitcher_GS.append(tHomeTeamGS)
            away_pitcher_GS.append(tAwayTeamGS)
            
            home_team.append(homeTeam)
            away_team.append(awayTeam)
            
            game_dates.append(dateOfGame)
            game_times.append(timeOfGame)
            
            
            if debug==1:
                print('{0} (Lineup {1} day OPS: {2})\n@ {3} (Lineup {4} day OPS: {5})'.
                     format(awayTeam,days,round(tAwayTeamOPS,3),homeTeam, days,round(tHomeTeamOPS,3)))
                print('{0} Pitcher is {1}, their {2} day ERA is {3}.\n@ {4} Pitcher is {5}, their {6} day ERA is {7}'.
                     format(awayTeam,away_probable_pitcher[i],days,round(tAwayTeamERA,3),homeTeam,home_probable_pitcher[i],days,round(tHomeTeamERA,3)))

        
        
        

        
            i+=1
                
                
                
        except:
            print('error with '+awayTeam+'@'+homeTeam+'. Going to next game.')
            i+=1
        




#     print(home_OPS)
#     print(away_OPS)
#     print(home_ERA)
#     print(away_ERA)
    
    d = {'HomeTeam': home_team,'AwayTeam':away_team,'GameDate':game_dates,'GameTime':game_times,'HomeOPS':home_OPS,
         'AwayOPS':away_OPS, 'HomewRCPlus':home_wRCplus,'AwaywRCPlus':away_wRCplus,
         'HomePitcherName':home_pitcher_name,'HomeERA':home_ERA,'HomePitcherGS':home_pitcher_GS,
         'AwayPitcherName':away_pitcher_name,'AwayERA':away_ERA,'AwayPitcherGS':away_pitcher_GS,
         'HomeFIP':home_FIP,'AwayFIP':away_FIP,'HomexFIP':home_xFIP,'AwayxFIP':away_xFIP,
         'HomeScoreAfter5':home_after_5,'AwayScoreAfter5':away_after_5,'HomeTotalScore':home_total_score,
         'AwayTotalScore':away_total_score}
    df = pd.DataFrame(data=d)
        
    return df    
    
    
    
# x = range(1,40,1)
x = range(0,1,1)

daterange = pd.date_range(start="2018-05-21",end="2018-09-29")
# daterange = pd.date_range(start="2021-07-17",end="2021-07-17")
k=0
for d in daterange:
    useDate = d.date()
    print(useDate)
    try:
        df2 = main(useDate,30,1,0)
        if k==0:
            df = df2
            k+=1
        else:
            df = df.append(df2,ignore_index=True)
            k+=1
        print(k)
    except:
        print('Error with ' + useDate.isoformat() + '. Going to next date.')
        k+=1
        print(k)

display(df)    
df.to_csv('C:/Users\Justin\Projects\python_baseball_2021\mlbdata_052118_092918_adv.csv',index=False)

In [12]:
### keep this ####

from pybaseball import playerid_reverse_lookup
from pybaseball import batting_stats_range
from pybaseball import pitching_stats_range
from pybaseball import batting_stats_bref
import statsapi
import pandas as pd
from datetime import date,timedelta
from fuzzywuzzy import process, fuzz
from pybaseball import cache
from IPython.display import display
from dateutil import parser
import fangraphs_db as fg
import pandas as pd
from datetime import date,timedelta,datetime
from pysbr import *
from collections import defaultdict

def get_odds_by_date_range(start,stop):


    start_dt = datetime.strptime(start, '%Y-%m-%d')
    stop_dt = datetime.strptime(stop, '%Y-%m-%d')
    
    mlb = MLB()
    sb = Sportsbook()

#     print(mlb.sport_config())
    
    e = EventsByDateRange(mlb.league_id,start_dt,stop_dt)

    e_ids = []
    for i in range(1,len(e.list())):
        if e.list()[i]['event status'] == 'complete':
            e_ids.append(e.list()[i]['event id'])

            
    # get current lines        
    cl = CurrentLines(e_ids,mlb.market_ids([397,398,725,91]),sb.ids(['5dimes']))
    
    # convert to dict
    cl_d = Convert(cl.list(e))
    
    # change dict names to play nice with matlab
    cl_d['market_id']               = cl_d.pop('market id')
    cl_d['event_id']                = cl_d.pop('event id')
    cl_d['spread_total']            = cl_d.pop('spread / total')
    cl_d['participant_full_name']   = cl_d.pop('participant full name')
    cl_d['participant_id']          = cl_d.pop('participant id')
    cl_d['sportsbook_id']           = cl_d.pop('sportsbook id')
    cl_d['american_odds']           = cl_d.pop('american odds')
    cl_d['decimal_odds']            = cl_d.pop('decimal odds')
    cl_d['participant_score']       = cl_d.pop('participant score')
    cl_d['sportsbook_alias']        = cl_d.pop('sportsbook alias')

    # get opening lines
#     print(e_ids)
    ol = OpeningLines(e_ids,mlb.market_ids([397,398,725,91]),sb.id(['5dimes']))
    
    # convert to dict
    ol_d = Convert(ol.list(e))
    
    # change dict names to play nice with matlab
    ol_d['market_id']               = ol_d.pop('market id')
    ol_d['event_id']                = ol_d.pop('event id')
    ol_d['opening_spread_total']    = ol_d.pop('spread / total')
    ol_d['participant_full_name']   = ol_d.pop('participant full name')
    ol_d['participant_id']          = ol_d.pop('participant id')
    ol_d['sportsbook_id']           = ol_d.pop('sportsbook id')
    ol_d['opening_american_odds']   = ol_d.pop('american odds')
    ol_d['opening_decimal_odds']    = ol_d.pop('decimal odds')
    ol_d['participant_score']       = ol_d.pop('participant score')
    ol_d['sportsbook_alias']        = ol_d.pop('sportsbook alias')

    # this is an unnecessary step but it was an easy fix to get it to play right in matlab
    dfc = pd.DataFrame.from_dict(cl_d,orient='index')
    dfc = dfc.transpose()
    
    dfo = pd.DataFrame.from_dict(ol_d,orient='index')
    dfo = dfo.transpose()
    
    # combine opening and closing dataframes
    new_df = pd.concat([dfc,dfo['opening_spread_total'],dfo['opening_american_odds'],dfo['opening_decimal_odds']],axis=1)
    
    return new_df

def Convert(lst):

    res = defaultdict(list)
    {res[key].append(sub[key]) for sub in lst for key in sub}
    
    
    return dict(res)

df = get_odds_by_date_range('2018-05-21','2018-09-29')

# first_5_pickem = df.loc[df['market_id']==91].sort_values(by=['datetime','event_id'])
# first_5_pickem['GameDate'] = first_5_pickem['datetime'].apply(lambda x: parser.parse(x).date())
# first_5_pickem['GameTime'] = first_5_pickem['datetime'].apply(lambda x: parser.parse(x).time())



first_5_spread = df.loc[df['market_id']==397].sort_values(by=['datetime','event_id'])
first_5_spread['GameDate'] = first_5_spread['datetime'].apply(lambda x: parser.parse(x).date())
first_5_spread['GameTime'] = first_5_spread['datetime'].apply(lambda x: parser.parse(x).time())

display(first_5_spread)

Unnamed: 0,datetime,event,market,result,profit,sportsbook,participant,market_id,event_id,spread_total,...,sportsbook_id,american_odds,decimal_odds,participant_score,sportsbook_alias,opening_spread_total,opening_american_odds,opening_decimal_odds,GameDate,GameTime
6676,2018-05-21T09:45:45-05:00,Baltimore@Chicago,1st Half - Point Spread,L,-100.0,5Dimes,CWS,397,3387805,0.5,...,3,-150,1.67,2,Sportbet,6.5,-105,1.95,2018-05-21,09:45:45
6677,2018-05-21T09:45:45-05:00,Baltimore@Chicago,1st Half - Point Spread,W,130.0,5Dimes,BAL,397,3387805,-0.5,...,3,130,2.3,3,Sportbet,6.5,-115,1.87,2018-05-21,09:45:45
8118,2018-05-21T09:45:54-05:00,Detroit@Minnesota,1st Half - Point Spread,W,69.0,5Dimes,MIN,397,3388173,-0.5,...,3,-145,1.69,4,Sportbet,0,102,2.02,2018-05-21,09:45:54
8119,2018-05-21T09:45:54-05:00,Detroit@Minnesota,1st Half - Point Spread,L,-100.0,5Dimes,DET,397,3388173,0.5,...,3,125,2.25,2,Sportbet,0,-117,1.85,2018-05-21,09:45:54
4194,2018-05-21T10:53:47-05:00,San Diego@Washington,1st Half - Point Spread,W,80.0,5Dimes,WSH,397,3387209,-0.5,...,3,-125,1.8,10,Sportbet,0,-120,1.83,2018-05-21,10:53:47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1377,2018-09-28T17:47:45-05:00,New York@Boston,1st Half - Point Spread,W,91.0,5Dimes,NYY,397,3386528,-0.5,...,3,-110,1.91,11,Sportbet,0.5,-125,1.8,2018-09-28,17:47:45
7890,2018-09-28T18:01:57-05:00,Washington@Colorado,1st Half - Point Spread,L,-100.0,5Dimes,WSH,397,3388121,0.5,...,3,120,2.2,2,Sportbet,0,120,2.2,2018-09-28,18:01:57
7891,2018-09-28T18:01:57-05:00,Washington@Colorado,1st Half - Point Spread,W,71.0,5Dimes,COL,397,3388121,-0.5,...,3,-140,1.71,5,Sportbet,0,-135,1.74,2018-09-28,18:01:57
7566,2018-09-28T21:06:01-05:00,Oakland@Los Angeles,1st Half - Point Spread,L,-100.0,5Dimes,OAK,397,3388032,-0.5,...,3,115,2.15,5,Sportbet,0,-125,1.8,2018-09-28,21:06:01


In [37]:
### keep this ####

import numpy as np

def determine_home_team(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return row['participant_full_name']
    elif participant_fragment == away_team_fragment:
        return None
    else:
        return 'error'

def determine_away_team(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return None
    elif participant_fragment == away_team_fragment:
        return row['participant_full_name']
    else:
        return 'error'    
    
def determine_home_odds(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return row['decimal_odds']
    elif participant_fragment == away_team_fragment:
        return None
    else:
        return 'error'
    
def determine_away_odds(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return None
    elif participant_fragment == away_team_fragment:
        return row['decimal_odds']
    else:
        return 'error'
    
def determine_opening_home_odds(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return row['opening_decimal_odds']
    elif participant_fragment == away_team_fragment:
        return None
    else:
        return 'error'
    
    
def determine_opening_home_spread(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return row['opening_spread_total']
    elif participant_fragment == away_team_fragment:
        return None
    else:
        return 'error'
    
def determine_home_spread(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return row['spread_total']
    elif participant_fragment == away_team_fragment:
        return None
    else:
        return 'error'    
    
    
def determine_opening_away_odds(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return None
    elif participant_fragment == away_team_fragment:
        return row['opening_decimal_odds']
    else:
        return 'error'    
def determine_opening_away_spread(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return None
    elif participant_fragment == away_team_fragment:
        return row['opening_spread_total']

    else:
        return 'error'
    
def determine_away_spread(row):
    away_team_fragment = row['event'].split('@')[0].split(' ')[0]
    home_team_fragment = row['event'].split('@')[1].split(' ')[0]
    
    participant_fragment = row['participant_full_name'].split(' ')[0]
    
    if home_team_fragment == away_team_fragment:
        return None
    elif participant_fragment == home_team_fragment:
        return None
    elif participant_fragment == away_team_fragment:
        return row['spread_total']
    else:
        return 'error'        

    
def determine_over_opening_total(row):
    ou = row['participant']
    
    if ou=='over':
        return row['opening_spread_total']
    elif ou=='under':
        return None
    else:
        return 'error' 
def determine_under_opening_total(row):
    ou = row['participant']
    
    if ou=='over':
        return None
    elif ou=='under':
        return row['opening_spread_total']
    else:
        return 'error'     
    
def determine_over_total(row):
    ou = row['participant']
    
    if ou=='over':
        return row['spread_total']
    elif ou=='under':
        return None
    else:
        return 'error'
def determine_under_total(row):
    ou = row['participant']
    
    if ou=='over':
        return None
    elif ou=='under':
        return row['spread_total']
    else:
        return 'error'
def determine_opening_over_odds(row):
    
def determine_over_odds(row):
def determine_opening_under_odds(row):
def determine_under_odds(row):
    
################ for first 5 innings moneyline ####################    
    
    
# first_5_pickem['HomeTeam'] = first_5_pickem.apply(lambda row: determine_home_team(row),axis=1)
# first_5_pickem['AwayTeam'] = first_5_pickem.apply(lambda row: determine_away_team(row),axis=1)
# first_5_pickem['HomeOdds'] = first_5_pickem.apply(lambda row: determine_home_odds(row),axis=1)
# first_5_pickem['AwayOdds'] = first_5_pickem.apply(lambda row: determine_away_odds(row),axis=1)
# first_5_pickem['OpeningHomeOdds'] = first_5_pickem.apply(lambda row: determine_opening_home_odds(row),axis=1)
# first_5_pickem['OpeningAwayOdds'] = first_5_pickem.apply(lambda row: determine_opening_away_odds(row),axis=1)

# new_home = first_5_pickem[['event_id','GameDate','GameTime','HomeTeam','OpeningHomeOdds','HomeOdds']].copy().dropna()
# new_away = first_5_pickem[['event_id','GameDate','GameTime','AwayTeam','OpeningAwayOdds','AwayOdds']].copy().dropna()                          
# home_away = pd.merge(new_home,new_away,on=['event_id','GameDate','GameTime'])


# new_home = first_5_pickem[['event_id','GameDate','GameTime','HomeTeam','OpeningHomeOdds','HomeOdds']].copy().dropna()
# new_away = first_5_pickem[['event_id','GameDate','GameTime','AwayTeam','OpeningAwayOdds','AwayOdds']].copy().dropna()                          
# home_away = pd.merge(new_home,new_away,on=['event_id','GameDate','GameTime'])

# home_away = home_away.sort_values(by=['GameDate','event_id'])
# home_away_no_double_headers = home_away.drop_duplicates(subset=['GameDate','HomeTeam'],keep='first')


################ for first 5 innings spread ####################    

# first_5_spread['HomeTeam'] = first_5_spread.apply(lambda row: determine_home_team(row),axis=1)
# first_5_spread['AwayTeam'] = first_5_spread.apply(lambda row: determine_away_team(row),axis=1)
# first_5_spread['HomeOdds'] = first_5_spread.apply(lambda row: determine_home_odds(row),axis=1)
# first_5_spread['AwayOdds'] = first_5_spread.apply(lambda row: determine_away_odds(row),axis=1)
# first_5_spread['OpeningHomeOdds'] = first_5_spread.apply(lambda row: determine_opening_home_odds(row),axis=1)
# first_5_spread['OpeningAwayOdds'] = first_5_spread.apply(lambda row: determine_opening_away_odds(row),axis=1)
# first_5_spread['OpeningHomeSpread'] = first_5_spread.apply(lambda row: determine_opening_home_spread(row),axis=1)
# first_5_spread['OpeningAwaySpread'] = first_5_spread.apply(lambda row: determine_opening_away_spread(row),axis=1)
# first_5_spread['HomeSpread'] = first_5_spread.apply(lambda row: determine_home_spread(row),axis=1)
# first_5_spread['AwaySpread'] = first_5_spread.apply(lambda row: determine_away_spread(row),axis=1)


# new_home = first_5_spread[['event_id','GameDate','GameTime','HomeTeam','OpeningHomeOdds','HomeOdds','OpeningHomeSpread','HomeSpread']].copy().dropna()
# new_away = first_5_spread[['event_id','GameDate','GameTime','AwayTeam','OpeningAwayOdds','AwayOdds','OpeningAwaySpread','AwaySpread']].copy().dropna()                          
# home_away = pd.merge(new_home,new_away,on=['event_id','GameDate'])

# home_away = home_away.sort_values(by=['GameDate','event_id'])
# home_away_no_double_headers = home_away.drop_duplicates(subset=['GameDate','HomeTeam'],keep='first')



# home_away_no_double_headers = home_away_no_double_headers[home_away_no_double_headers.AwayOdds != 'error']

# # home_away.to_csv('home_away_pickem_odds_052121_thru_071021.csv')
# home_away_no_double_headers = home_away_no_double_headers.sort_values(by=['GameDate','HomeTeam']).reset_index()
# home_away_no_double_headers['HomePlusDate'] = home_away_no_double_headers['GameDate'].astype(str) + home_away_no_double_headers['HomeTeam']
# # home_away_no_double_headers['OpeningHomeOddsSpreadRatio'] = home_away_no_double_headers['OpeningHomeOdds']/home_away_no_double_headers['OpeningHomeSpread']
# # home_away_no_double_headers['HomeOddsSpreadRatio'] = home_away_no_double_headers['HomeOdds']/home_away_no_double_headers['HomeSpread']
# # home_away_no_double_headers['OpeningAwayOddsSpreadRatio'] = home_away_no_double_headers['OpeningAwayOdds']/home_away_no_double_headers['OpeningAwaySpread']
# # home_away_no_double_headers['AwayOddsSpreadRatio'] = home_away_no_double_headers['AwayOdds']/home_away_no_double_headers['AwaySpread']

################ for first 5 innings total ####################    


df1 = pd.read_csv('big_odds_20190521_thru_20190929.csv')
first_5_total = df1.loc[df1['market_id']==398].sort_values(by=['datetime','event_id'])
display(first_5_total)

first_5_total = first_5_total[['event_id','opening_spread_total','spread_total','opening_decimal_odds','decimal_odds']]


first_5_total = first_5_total.rename(columns={'opening_spread_total':'OpeningTotal','spread_total':'Total','opening_decimal_odds':'OpeningTotalOdds','decimal_odds':'TotalOdds'})



df2 = pd.read_csv('adv_stats_plus_spreads_052119_thru_092919_corrected.csv')

display(df2)
df2 = pd.merge_ordered(df2,first_5_total,on='event_id')
df2 = df2.dropna()
display(df2)

Unnamed: 0.1,Unnamed: 0,datetime,event,market,result,profit,sportsbook,participant,market_id,event_id,...,participant_full_name,participant_id,sportsbook_id,american_odds,decimal_odds,participant_score,sportsbook_alias,opening_spread_total,opening_american_odds,opening_decimal_odds
5416,5416,2019-05-21T09:24:57-05:00,New York@Baltimore,1st Half - American Total,W,87.0,5Dimes,over,398,3729258,...,,15143,3,-115,1.87,12,Sportbet,5.0,-115,1.87
5417,5417,2019-05-21T09:24:57-05:00,New York@Baltimore,1st Half - American Total,L,-100.0,5Dimes,under,398,3729258,...,,15144,3,-105,1.95,12,Sportbet,5.0,-105,1.95
5740,5740,2019-05-21T12:16:08-05:00,Los Angeles@Tampa Bay,1st Half - American Total,L,-100.0,5Dimes,over,398,3729339,...,,15143,3,100,2.00,3,Sportbet,4.0,105,2.05
5741,5741,2019-05-21T12:16:08-05:00,Los Angeles@Tampa Bay,1st Half - American Total,W,83.0,5Dimes,under,398,3729339,...,,15144,3,-120,1.83,3,Sportbet,4.0,-125,1.80
4156,4156,2019-05-21T15:24:15-05:00,Boston@Toronto,1st Half - American Total,W,105.0,5Dimes,over,398,3728945,...,,15143,3,105,2.05,6,Sportbet,5.0,100,2.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5081,5081,2019-09-28T18:15:54-05:00,Milwaukee@Colorado,1st Half - American Total,W,91.0,5Dimes,under,398,3729180,...,,15144,3,-110,1.91,1,Sportbet,7.5,-120,1.83
2356,2356,2019-09-28T20:03:54-05:00,Houston@Los Angeles,1st Half - American Total,L,-100.0,5Dimes,over,398,3728501,...,,15143,3,-110,1.91,3,Sportbet,5.0,100,2.00
2357,2357,2019-09-28T20:03:54-05:00,Houston@Los Angeles,1st Half - American Total,W,91.0,5Dimes,under,398,3728501,...,,15144,3,-110,1.91,3,Sportbet,5.0,-120,1.83
6484,6484,2019-09-28T20:06:12-05:00,Oakland@Seattle,1st Half - American Total,L,-100.0,5Dimes,over,398,3729516,...,,15143,3,-120,1.83,1,Sportbet,5.0,-105,1.95


Unnamed: 0.1,Unnamed: 0,index_x,HomeTeam_x,AwayTeam_x,GameDate_x,GameTime,HomeOPS,AwayOPS,HomewRCPlus,AwaywRCPlus,...,GameTime_y,AwayTeam_y,OpeningAwayOdds,AwayOdds,OpeningAwaySpread,AwaySpread,OpeningHomeOddsSpreadRatio,HomeOddsSpreadRatio,OpeningAwayOddsSpreadRatio,AwayOddsSpreadRatio
0,1,3.0,Baltimore Orioles,New York Yankees,2019-05-21,23:05:00,0.684444,0.812081,77.547067,112.642638,...,12:08:26,New York Yankees,1.67,1.61,-0.5,-0.5,4.60,4.90,-3.34,-3.22
1,2,2.0,Chicago Cubs,Philadelphia Phillies,2019-05-21,23:05:00,0.832134,0.748106,112.784011,95.336449,...,18:02:33,Philadelphia Phillies,1.83,1.74,0.5,0.5,-4.00,-4.30,3.66,3.48
2,3,0.0,Cleveland Indians,Oakland Athletics,2019-05-21,22:10:00,0.705041,0.739313,83.243812,100.581645,...,17:06:40,Oakland Athletics,1.83,1.77,0.5,0.5,-4.00,-4.20,3.66,3.54
3,4,7.0,Detroit Tigers,Miami Marlins,2019-05-21,23:10:00,0.664616,0.588098,70.105263,61.060000,...,17:09:34,Miami Marlins,1.74,1.65,0.5,0.5,-4.30,-4.70,3.48,3.30
4,5,9.0,Houston Astros,Chicago White Sox,2019-05-21,00:10:00,0.904353,0.714527,137.106117,88.751079,...,18:41:10,Chicago White Sox,2.70,2.85,0.5,0.5,-3.00,-2.88,5.40,5.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1522,1687,1556.0,Seattle Mariners,Oakland Athletics,2019-09-28,01:10:00,0.708298,0.863258,92.253247,130.966165,...,20:03:24,Oakland Athletics,1.83,1.91,-0.5,-0.5,4.00,3.82,-3.66,-3.82
1523,1688,1550.0,St. Louis Cardinals,Chicago Cubs,2019-09-28,23:15:00,0.756738,0.831891,99.833137,114.113978,...,17:13:16,Chicago Cubs,2.10,2.05,0.5,0.5,-3.54,-3.60,4.20,4.10
1524,1689,1552.0,Texas Rangers,New York Yankees,2019-09-28,00:05:00,0.760117,0.828818,89.687585,115.260937,...,09:44:11,New York Yankees,1.56,1.56,-0.5,-0.5,5.16,5.16,-3.12,-3.12
1525,1690,1545.0,Toronto Blue Jays,Tampa Bay Rays,2019-09-28,19:07:00,0.780490,0.843940,101.441606,126.158763,...,09:44:52,Tampa Bay Rays,1.71,1.71,-0.5,-0.5,4.40,4.40,-3.42,-3.42


Unnamed: 0.1,Unnamed: 0,index_x,HomeTeam_x,AwayTeam_x,GameDate_x,GameTime,HomeOPS,AwayOPS,HomewRCPlus,AwaywRCPlus,...,OpeningAwaySpread,AwaySpread,OpeningHomeOddsSpreadRatio,HomeOddsSpreadRatio,OpeningAwayOddsSpreadRatio,AwayOddsSpreadRatio,OpeningTotal,Total,OpeningTotalOdds,TotalOdds
0,1284.0,1176.0,Detroit Tigers,Cleveland Indians,2019-08-29,17:10:00,0.687834,0.810574,77.760000,108.295796,...,-0.5,-0.5,4.40,4.90,-3.42,-3.22,4.5,4.0,2.00,1.83
1,1284.0,1176.0,Detroit Tigers,Cleveland Indians,2019-08-29,17:10:00,0.687834,0.810574,77.760000,108.295796,...,-0.5,-0.5,4.40,4.90,-3.42,-3.22,4.5,4.0,1.83,2.00
2,385.0,347.0,Texas Rangers,Cleveland Indians,2019-06-18,00:05:00,0.723912,0.807953,80.167726,107.817272,...,-0.5,-0.5,3.54,3.36,-4.20,-4.56,6.0,5.5,2.00,1.83
3,385.0,347.0,Texas Rangers,Cleveland Indians,2019-06-18,00:05:00,0.723912,0.807953,80.167726,107.817272,...,-0.5,-0.5,3.54,3.36,-4.20,-4.56,6.0,5.5,1.83,2.00
4,117.0,112.0,Colorado Rockies,Arizona Diamondbacks,2019-05-29,00:40:00,0.869862,0.811858,110.008224,105.536585,...,-0.5,-0.5,3.60,3.60,-4.10,-4.10,6.0,6.0,1.83,1.91
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3361,765.0,693.0,Tampa Bay Rays,Chicago White Sox,2019-07-20,22:10:00,0.719474,0.769173,93.326301,103.806678,...,0.5,0.5,-3.74,-4.10,3.90,3.60,4.5,4.5,1.83,1.87
3362,1645.0,1519.0,San Diego Padres,Los Angeles Dodgers,2019-09-25,02:10:00,0.698667,0.811333,86.157468,110.842199,...,-0.5,-0.5,4.10,4.00,-3.60,-3.66,5.0,4.5,2.00,1.91
3363,1645.0,1519.0,San Diego Padres,Los Angeles Dodgers,2019-09-25,02:10:00,0.698667,0.811333,86.157468,110.842199,...,-0.5,-0.5,4.10,4.00,-3.60,-3.66,5.0,4.5,1.83,1.91
3366,352.0,319.0,Detroit Tigers,Cleveland Indians,2019-06-16,17:10:00,0.731302,0.763407,87.671096,96.572816,...,-0.5,-0.5,3.90,4.10,-3.74,-3.60,4.5,4.0,2.00,1.83


In [21]:
df.to_csv('big_odds_everything_20180521_thru_20180929.csv')

In [16]:
baseball_stats = pd.read_csv('mlbdata_052118_092918_adv.csv')
baseball_stats = baseball_stats.sort_values(by=['GameDate','HomeTeam']).reset_index() 
baseball_stats['HomePlusDate'] = baseball_stats['GameDate'].astype(str) + baseball_stats['HomeTeam']
baseball_stats

Unnamed: 0,index,HomeTeam,AwayTeam,GameDate,GameTime,HomeOPS,AwayOPS,HomewRCPlus,AwaywRCPlus,HomePitcherName,...,AwayPitcherGS,HomeFIP,AwayFIP,HomexFIP,AwayxFIP,HomeScoreAfter5,AwayScoreAfter5,HomeTotalScore,AwayTotalScore,HomePlusDate
0,346,Washington Nationals,New York Yankees,2018-05-15,23:05:00,0.641747,0.800546,68.129091,114.971154,Gio Gonzalez,...,4.0,4.84,6.40,4.34,3.76,3.0,3.0,5.0,3.0,2018-05-15Washington Nationals
1,6,Chicago White Sox,Baltimore Orioles,2018-05-21,00:10:00,0.782647,0.768364,112.218950,106.625990,Hector Santiago,...,5.0,6.26,5.71,6.06,4.81,1.0,2.0,2.0,3.0,2018-05-21Chicago White Sox
2,8,Los Angeles Dodgers,Colorado Rockies,2018-05-21,02:10:00,0.760405,0.792284,108.168360,99.401760,Walker Buehler,...,5.0,2.24,4.28,2.73,3.52,1.0,1.0,1.0,2.0,2018-05-21Los Angeles Dodgers
3,3,Milwaukee Brewers,Arizona Diamondbacks,2018-05-21,23:40:00,0.748855,0.599897,99.846924,61.242784,Chase Anderson,...,5.0,6.27,2.31,5.86,2.76,3.0,1.0,4.0,2.0,2018-05-21Milwaukee Brewers
4,5,Minnesota Twins,Detroit Tigers,2018-05-21,00:10:00,0.750052,0.756209,102.058968,102.593301,Jose Berrios,...,1.0,5.97,6.16,4.86,5.59,2.0,2.0,4.0,2.0,2018-05-21Minnesota Twins
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1556,1552,Philadelphia Phillies,Atlanta Braves,2018-09-29,23:05:00,0.682352,0.785181,84.520202,108.720497,Aaron Nola,...,5.0,5.31,2.94,2.84,3.74,0.0,0.0,3.0,0.0,2018-09-29Philadelphia Phillies
1557,1558,San Diego Padres,Arizona Diamondbacks,2018-09-29,00:40:00,0.715550,0.685843,94.774823,81.771368,Jacob Nix,...,4.0,6.49,5.65,5.74,5.22,3.0,4.0,4.0,5.0,2018-09-29San Diego Padres
1558,1549,San Francisco Giants,Los Angeles Dodgers,2018-09-29,20:05:00,0.642960,0.879029,73.706577,139.350217,Dereck Rodriguez,...,5.0,4.02,4.75,5.26,4.07,5.0,5.0,6.0,10.0,2018-09-29San Francisco Giants
1559,1560,Seattle Mariners,Texas Rangers,2018-09-29,01:10:00,0.742776,0.716065,110.014514,84.599327,James Paxton,...,3.0,5.43,7.69,2.86,4.94,1.0,1.0,4.0,1.0,2018-09-29Seattle Mariners


In [17]:
test_b = pd.merge_ordered(baseball_stats,home_away_no_double_headers,on='HomePlusDate')
test_b = test_b.dropna()
# test_b.to_csv('stats_plus_odds_052121_thru_071021_corrected.csv')
test_b.to_csv('adv_stats_plus_spreads_052118_thru_092918_corrected.csv')

test_b

Unnamed: 0,index_x,HomeTeam_x,AwayTeam_x,GameDate_x,GameTime,HomeOPS,AwayOPS,HomewRCPlus,AwaywRCPlus,HomePitcherName,...,OpeningHomeOdds,HomeOdds,OpeningHomeSpread,HomeSpread,GameTime_y,AwayTeam_y,OpeningAwayOdds,AwayOdds,OpeningAwaySpread,AwaySpread
1,6.0,Chicago White Sox,Baltimore Orioles,2018-05-21,00:10:00,0.782647,0.768364,112.218950,106.625990,Hector Santiago,...,1.95,1.67,6.5,0.5,09:45:45,Baltimore Orioles,1.87,2.3,6.5,-0.5
2,8.0,Los Angeles Dodgers,Colorado Rockies,2018-05-21,02:10:00,0.760405,0.792284,108.168360,99.401760,Walker Buehler,...,1.87,1.8,4.5,-0.5,19:35:18,Colorado Rockies,1.95,2.05,4.5,0.5
3,3.0,Milwaukee Brewers,Arizona Diamondbacks,2018-05-21,23:40:00,0.748855,0.599897,99.846924,61.242784,Chase Anderson,...,2.3,1.91,0,0.5,15:46:54,Arizona Diamondbacks,1.69,1.91,0,-0.5
4,5.0,Minnesota Twins,Detroit Tigers,2018-05-21,00:10:00,0.750052,0.756209,102.058968,102.593301,Jose Berrios,...,2.02,1.69,0,-0.5,09:45:54,Detroit Tigers,1.85,2.25,0,0.5
5,2.0,New York Mets,Miami Marlins,2018-05-21,23:10:00,0.738696,0.743139,104.647860,105.922674,Jason Vargas,...,2.6,2.15,0,-0.5,17:30:57,Miami Marlins,1.57,1.74,0,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1682,1538.0,New York Mets,Miami Marlins,2018-09-28,23:10:00,0.810867,0.655152,123.846554,82.024958,Corey Oswalt,...,2.05,1.95,4.5,-0.5,17:05:56,Miami Marlins,1.8,1.87,4.5,0.5
1683,1536.0,Philadelphia Phillies,Atlanta Braves,2018-09-28,23:05:00,0.668939,0.740907,80.331606,96.545682,Jerad Eickhoff,...,2.05,1.87,5,0.5,14:35:24,Atlanta Braves,1.8,1.95,5,-0.5
1684,1544.0,San Diego Padres,Arizona Diamondbacks,2018-09-28,02:10:00,0.752829,0.742224,104.932231,95.685575,Eric Lauer,...,1.8,1.91,0,0.5,08:40:27,Arizona Diamondbacks,2.1,1.91,0,-0.5
1686,1545.0,Seattle Mariners,Texas Rangers,2018-09-28,02:10:00,0.748711,0.691074,108.381737,78.990691,Wade LeBlanc,...,1.83,1.8,5,-0.5,08:41:08,Texas Rangers,2,2.05,5,0.5
