In [6]:
%run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"

baseball_path = r'C:\Users\james\Documents\MLB\Database'




In [7]:
all_games_df = read_and_save_games(team_map, generate=False)
all_games_df['away_score'] = all_games_df['away_score'].astype('int')
all_games_df['home_score'] = all_games_df['home_score'].astype('int')

In [13]:
unique_datetimes = list(all_games_df['game_datetime'].unique())

In [18]:
# An api key is emailed to you when you sign up to a plan
# Get a free API key at https://api.the-odds-api.com/
API_KEY = 'd26e1a8d59392c1f035a1b9b1db0a1e0'

# Sport key
# More info at https://the-odds-api.com/sports-odds-data/sports-apis.html
SPORT = 'baseball_mlb'

# Bookmaker regions
# uk | us | eu | au. Multiple can be specified if comma delimited.
# More info at https://the-odds-api.com/sports-odds-data/bookmaker-apis.html
REGIONS = 'us' 

# Odds markets
# h2h | spreads | totals. Multiple can be specified if comma delimited
# More info at https://the-odds-api.com/sports-odds-data/betting-markets.html
# Note only featured markets (h2h, spreads, totals) are available with the historical odds endpoint.
MARKETS = ('h2h,spreads,totals')

# Odds format
# decimal | american
ODDS_FORMAT = 'american'

# Date format
# iso | unix
DATE_FORMAT = 'iso'

# Historical timestamp
# Must be in ISO8601 format
DATE = '2022-04-07T23:10:00Z'


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
#
# Query bookmaker odds for live and upcoming games as they were at the specified DATE parameter.
# The usage quota cost = 10 x [number of markets specified] x [number of regions specified]
# For examples of usage quota costs, see https://the-odds-api.com/liveapi/guides/v4/#usage-quota-costs-3
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 


def extract_odds_api(API_KEY, SPORT, REGIONS, MARKETS, ODDS_FORMAT, DATE_FORMAT, DATE):
    odds_response = requests.get(f'https://api.the-odds-api.com/v4/historical/sports/{SPORT}/odds', params={
        'api_key': API_KEY,
        'regions': REGIONS,
        'markets': MARKETS,
        'oddsFormat': ODDS_FORMAT,
        'dateFormat': DATE_FORMAT,
        'date': DATE,
    })

    if odds_response.status_code != 200:
        print(f'Failed to get odds: status_code {odds_response.status_code}, response body {odds_response.text}')

    else:
        odds_json = odds_response.json()

        # print(json.dumps(odds_json['data'], indent=4))

        print(f"Timestamp: {odds_json['timestamp']}")
        print(f"Previous available timestamp: {odds_json['previous_timestamp']}")
        print(f"Next available timestamp: {odds_json['next_timestamp']}")

        # Check the usage quota
        print('Remaining requests', odds_response.headers['x-requests-remaining'])
        print('Used requests', odds_response.headers['x-requests-used'])
        
    return odds_json

# Extract Data

In [71]:
# Loop over unique datetimes
for datetime in unique_datetimes[50:100]:
    # Extract odds
    odds_json = extract_odds_api(API_KEY, SPORT, REGIONS, MARKETS, ODDS_FORMAT, DATE_FORMAT, datetime)
    
    # Convert date to usable name
    datetime = datetime.replace(":", "")
    
    # Open the file in write mode and write the JSON data to it
    with open(os.path.join(baseball_path, "A08. Odds API", "Raw", f"{datetime}.txt"), "w") as file:
        json.dump(odds_json, file)

Timestamp: 2022-04-12T01:35:00Z
Previous available timestamp: 2022-04-12T01:25:00Z
Next available timestamp: 2022-04-12T01:45:00Z
Remaining requests 18257
Used requests 1743
Timestamp: 2022-04-12T01:45:00Z
Previous available timestamp: 2022-04-12T01:35:00Z
Next available timestamp: 2022-04-12T01:55:00Z
Remaining requests 18227
Used requests 1773
Timestamp: 2022-04-12T17:05:00Z
Previous available timestamp: 2022-04-12T16:55:00Z
Next available timestamp: 2022-04-12T17:15:00Z
Remaining requests 18197
Used requests 1803
Timestamp: 2022-04-12T20:05:00Z
Previous available timestamp: 2022-04-12T19:55:00Z
Next available timestamp: 2022-04-12T20:15:00Z
Remaining requests 18167
Used requests 1833
Timestamp: 2022-04-12T20:05:00Z
Previous available timestamp: 2022-04-12T19:55:00Z
Next available timestamp: 2022-04-12T20:15:00Z
Remaining requests 18137
Used requests 1863
Timestamp: 2022-04-12T22:35:00Z
Previous available timestamp: 2022-04-12T22:25:00Z
Next available timestamp: 2022-04-12T22:45:00Z


# Clean Data

In [9]:
def read_json(datetime):
    # Format to match text file
    datetime = datetime.replace(":", "")
    
    # Open the file in read mode and load the JSON data
    with open(os.path.join(baseball_path, "A08. Odds API", f"{datetime}.txt"), 'r') as file:
        json_data = json.load(file)
        
    return json_data

In [67]:
def clean_json(json_data):
    # Extracting the 'data' part from your JSON
    data = json_data['data']

    # Creating an empty list to store the extracted information
    all_data = []

    # Looping through each item in the data
    for item in data:
        id = item['id']
        sport_key = item['sport_key']
        commence_time = item['commence_time']
        home_team = item['home_team']
        away_team = item['away_team']

        bookmakers = item['bookmakers']

        for bookmaker in bookmakers:
            book = bookmaker['key']

            markets = bookmaker['markets']

            for market in markets:
                bet = market['key']
                last_update = market['last_update']

                outcomes = market['outcomes']

                for outcome in outcomes:
                    name = outcome['name']
                    price = outcome['price']
                    if bet in ['spreads', 'totals']:
                        point = outcome['point']
                    else:
                        point = "0"

                    row = [id, sport_key, commence_time, away_team, home_team, book, bet, last_update, name, price, point]

                    all_data.append(row)


    # Creating a DataFrame from the list of extracted information
    df = pd.DataFrame(all_data, columns=['id', 'sport_key', 'commence_time', 'away_team', 'home_team', 'book', 'bet', 'last_update', 'side', 'price', 'point'])

    # Create a mask to identify rows where df['bet'] is not equal to "totals"
    mask = df['bet'] != "totals"

    # Apply the condition only to the rows identified by the mask
    df.loc[mask, 'side'] = np.where(df.loc[mask, 'side'] == df.loc[mask, 'away_team'], 'Away', 'Home')

    
    # Pivot the dataframe
    pivot_df = df.pivot_table(index=['id', 'sport_key', 'book', 'commence_time', 'away_team', 'home_team', 'last_update'],
                              columns=['bet', 'side'],
                              values=['price', 'point'],
                              aggfunc='first')

    # Flatten column names
    new_columns = []
    for col in pivot_df.columns:
        new_col = '_'.join(col)
        new_columns.append(new_col)
    pivot_df.columns = new_columns

    # Reset index to make it flat
    final_df = pivot_df.reset_index()

    final_df = final_df.merge(team_map[['FULLNAME', 'BASEBALLPRESSTEAM']], left_on='away_team', right_on='FULLNAME', how='left')
    final_df = final_df.merge(team_map[['FULLNAME', 'BASEBALLPRESSTEAM']], left_on='home_team', right_on='FULLNAME', how='left')

    final_df.rename(columns={'BASEBALLPRESSTEAM_x':'VisitorTeamShort', 'BASEBALLPRESSTEAM_y':'HomeTeamShort'}, inplace=True)
    final_df.rename(columns={'point_spreads_Home':'Spread', 'point_totals_Over':'OU'}, inplace=True)
    final_df.rename(columns={'price_spreads_Away':'SpreadMoney1', 'price_spreads_Home':'SpreadMoney2'}, inplace=True)
    final_df.rename(columns={'price_totals_Over':'OuMoney1', 'price_totals_Under':'OuMoney2'}, inplace=True)
    final_df.rename(columns={'price_h2h_Away':'MLMoney1', 'price_h2h_Home':'MLMoney2'}, inplace=True)
    final_df.rename(columns={'commence_time':'EventDateTime'}, inplace=True)
    final_df['date'] = pd.to_datetime(final_df['EventDateTime']).dt.strftime('%Y%m%d')

    # Maintaining to keep compatibility with Fantasy Labs. Could calculate on my own later.
    final_df['VisitorVegasRuns'] = np.nan
    final_df['HomeVegasRuns'] = np.nan

    final_df = final_df[['book', 'last_update', 'VisitorTeamShort', 'HomeTeamShort', 'Spread', 'OU', 'SpreadMoney1', 'SpreadMoney2', 'OuMoney1', 'OuMoney2', 'MLMoney1', 'MLMoney2', 'VisitorVegasRuns', 'HomeVegasRuns', 'EventDateTime', 'date']]

    
    return final_df

In [68]:
json_data = read_json(unique_datetimes[0])
final_df = clean_json(json_data)
final_df.head()

Unnamed: 0,book,last_update,VisitorTeamShort,HomeTeamShort,Spread,OU,SpreadMoney1,SpreadMoney2,OuMoney1,OuMoney2,MLMoney1,MLMoney2,VisitorVegasRuns,HomeVegasRuns,EventDateTime,date
0,barstool,2022-04-07T18:10:42Z,NYM,WSH,1.5,9.0,132.0,-164.0,-114.0,-106.0,-120.0,102.0,,,2022-04-07T23:05:00Z,20220407
1,betfair,2022-04-07T18:10:27Z,NYM,WSH,,,,,,,-112.0,108.0,,,2022-04-07T23:05:00Z,20220407
2,betmgm,2022-04-07T18:09:50Z,NYM,WSH,1.5,9.0,135.0,-161.0,-120.0,100.0,-120.0,100.0,,,2022-04-07T23:05:00Z,20220407
3,betonlineag,2022-04-07T18:10:29Z,NYM,WSH,1.5,9.5,131.0,-154.0,-101.0,-119.0,-118.0,106.0,,,2022-04-07T23:05:00Z,20220407
4,betrivers,2022-04-07T18:10:04Z,NYM,WSH,1.5,9.0,132.0,-164.0,-114.0,-106.0,-120.0,104.0,,,2022-04-07T23:05:00Z,20220407


In [69]:
def closing_lines(all_games_df, date):
    # Merge in BASEBALLPRESSTEAM for away team so we can identify matchups better.
    all_games_df = all_games_df.merge(team_map[['BBREFTEAM', 'BASEBALLPRESSTEAM']], left_on='away_team', right_on='BBREFTEAM', how='left')
    
    # Extract games from specified date
    df = all_games_df.query(f'date == "{date}"').reset_index(drop=True)
    
    # List of closing lines
    closing_lines = []
    # Loop over games
    for i in range(len(df)):
        # Identify datetime
        datetime = df['game_datetime'][i]
        
        # Read in relevant json data
        json_data = read_json(datetime)
        odds_df = clean_json(json_data)
        
        # Identify away team 
        away_team = df['BASEBALLPRESSTEAM'][i]
        # Only keep odds for that away team (should be their last update before start)
        odds_df = odds_df[odds_df['VisitorTeamShort'] == away_team]
        
        
        # Select books:
        # Could do average or best (recent) odds. Will just do DK for now.
        odds_df = odds_df.query('book == "draftkings"')
        
        # Avoid doubleheaders
        odds_df.drop_duplicates('VisitorTeamShort', keep='first', inplace=True)
        
        # Add file
        odds_df['file'] = datetime.replace(":", "")
        
        closing_lines.append(odds_df)
        
    # Concatenate all together
    closing_line_df = pd.concat(closing_lines, axis=0).reset_index(drop=True)
    
    
    return closing_line_df


daily_game_df = closing_lines(all_games_df, "20220410")

daily_game_df

Unnamed: 0,book,last_update,VisitorTeamShort,HomeTeamShort,Spread,OU,SpreadMoney1,SpreadMoney2,OuMoney1,OuMoney2,MLMoney1,MLMoney2,VisitorVegasRuns,HomeVegasRuns,EventDateTime,date,file
0,draftkings,2022-04-10T16:58:56Z,OAK,PHI,-1.5,8.5,100.0,-120.0,-105.0,-115.0,190.0,-233.0,,,2022-04-10T17:06:00Z,20220410,2022-04-10T170500Z
1,draftkings,2022-04-10T16:58:56Z,BAL,TB,-1.5,8.0,-130.0,110.0,-120.0,100.0,155.0,-179.0,,,2022-04-10T17:11:00Z,20220410,2022-04-10T171000Z
2,draftkings,2022-04-10T16:58:56Z,CWS,DET,1.5,8.5,120.0,-141.0,110.0,-130.0,-135.0,115.0,,,2022-04-10T17:11:00Z,20220410,2022-04-10T171000Z
3,draftkings,2022-04-10T17:29:21Z,NYM,WSH,1.5,9.5,115.0,-135.0,100.0,-120.0,-135.0,115.0,,,2022-04-10T17:36:00Z,20220410,2022-04-10T173500Z
4,draftkings,2022-04-10T17:29:21Z,CIN,ATL,-1.5,9.0,-135.0,115.0,-115.0,-105.0,150.0,-169.0,,,2022-04-10T17:36:00Z,20220410,2022-04-10T173500Z
5,draftkings,2022-04-10T17:29:21Z,TEX,TOR,-1.5,9.5,-110.0,-110.0,-105.0,-115.0,170.0,-200.0,,,2022-04-10T17:38:00Z,20220410,2022-04-10T173700Z
6,draftkings,2022-04-10T17:59:47Z,CLE,KC,1.5,9.0,160.0,-189.0,-105.0,-115.0,-105.0,-115.0,,,2022-04-10T18:10:00Z,20220410,2022-04-10T181000Z
7,draftkings,2022-04-10T17:59:47Z,SEA,MIN,-1.5,8.5,-161.0,140.0,-115.0,-105.0,120.0,-141.0,,,2022-04-10T18:10:00Z,20220410,2022-04-10T181000Z
8,draftkings,2022-04-10T18:09:07Z,PIT,STL,-1.5,8.5,-105.0,-115.0,-110.0,-110.0,185.0,-227.0,,,2022-04-10T18:16:00Z,20220410,2022-04-10T181500Z
9,draftkings,2022-04-10T18:09:07Z,MIL,CHC,1.5,7.5,135.0,-154.0,100.0,-120.0,-135.0,115.0,,,2022-04-10T18:21:00Z,20220410,2022-04-10T182000Z
