In [16]:

import os
import re
import requests
import sys
import os
import pandas as pd
import numpy as np
import tiktoken
from openai import AzureOpenAI
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
import json
from pybaseball import *
import tiktoken
import ast


In [5]:
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-02-01",
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

deployment_name='infiniti-gpt4' 

def get_completion_from_messages(messages, 
                                 model="infiniti-gpt4", 
                                 temperature=0, 
                                 max_tokens=600):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=max_tokens, 
    )
    return response.choices[0].message.content



def num_tokens_from_string(string: str, encoding_name: str) -> int:
    encoding = tiktoken.encoding_for_model(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [6]:

def get_player_id(last_name, first_name):
    """Gets the player MLBAM ID for the given player."""
    data = playerid_lookup(last_name, first_name, fuzzy=True)
    
    if data.empty:
        return json.dumps({"error": "Player not found"})
    
    data = data.head(1)
    player_info = {
        "name": data['name_first'].values[0] + ' ' + data['name_last'].values[0],
        "mlb_id": data["key_mlbam"].values[0],
        'fg_ig': data['key_fangraphs'].values[0]

    }
    return player_info


In [7]:
t = pybaseball.batting_stats(2024)
available_stats = list(t.columns)
available_stats = {
    'IDfg': 'Fangraphs Player ID',
    'Season': 'Season Year',
    'Name': 'Player Name',
    'Team': 'Team Name',
    'Age': 'Player Age',
    'G': 'Games Played',
    'AB': 'At Bats',
    'PA': 'Plate Appearances',
    'H': 'Hits',
    '1B': 'Singles',
    '2B': 'Doubles',
    '3B': 'Triples',
    'HR': 'Home Runs',
    'R': 'Runs Scored',
    'RBI': 'Runs Batted In',
    'BB': 'Walks',
    'IBB': 'Intentional Walks',
    'SO': 'Strikeouts',
    'HBP': 'Hit By Pitches',
    'SF': 'Sacrifice Flies',
    'SH': 'Sacrifice Bunts',
    'GDP': 'Grounded into Double Play',
    'SB': 'Stolen Bases',
    'CS': 'Caught Stealing',
    'AVG': 'Batting Average',
    'GB': 'Ground Balls',
    'FB': 'Fly Balls',
    'LD': 'Line Drives',
    'IFFB': 'Infield Fly Balls',
    'Pitches': 'Total Pitches Seen',
    'Balls': 'Total Balls Seen',
    'Strikes': 'Total Strikes Seen',
    'IFH': 'Infield Hits',
    'BU': 'Bunts',
    'BUH': 'Bunt Hits',
    'BB%': 'Walk Percentage',
    'K%': 'Strikeout Percentage',
    'BB/K': 'Walk to Strikeout Ratio',
    'OBP': 'On Base Percentage',
    'SLG': 'Slugging Percentage',
    'OPS': 'On Base Plus Slugging',
    'ISO': 'Isolated Power',
    'BABIP': 'Batting Average on Balls in Play',
    'GB/FB': 'Ground Ball to Fly Ball Ratio',
    'LD%': 'Line Drive Percentage',
    'GB%': 'Ground Ball Percentage',
    'FB%': 'Fly Ball Percentage',
    'IFFB%': 'Infield Fly Ball Percentage',
    'HR/FB': 'Home Run to Fly Ball Ratio',
    'IFH%': 'Infield Hit Percentage',
    'BUH%': 'Bunt Hit Percentage',
    'wOBA': 'Weighted On Base Average',
    'wRAA': 'Weighted Runs Above Average',
    'wRC': 'Weighted Runs Created',
    'Bat': 'Batting Runs Above Average',
    'Fld': 'Fielding Runs Above Average',
    'Rep': 'Replacement Runs',
    'Pos': 'Positional Runs Above Average',
    'RAR': 'Runs Above Replacement',
    'WAR': 'Wins Above Replacement',
    'Dol': 'Dollar Value of Performance',
    'Spd': 'Speed Score',
    'wRC+': 'Weighted Runs Created Plus',
    'WPA': 'Win Probability Added',
    '-WPA': 'Negative Win Probability Added',
    '+WPA': 'Positive Win Probability Added',
    'RE24': 'Run Expectancy 24 Base Out State',
    'REW': 'Run Expectancy Wins',
    'pLI': 'Average Leverage Index',
    'phLI': 'Average Leverage Index while Pinch Hitting',
    'PH': 'Pinch Hits',
    'WPA/LI': 'Situational Wins',
    'Clutch': 'Clutch Score',
    'FB% (Pitch)': 'Fastball Percentage',
    'FBv': 'Average Fastball Velocity',
    'SL%': 'Slider Percentage',
    'SLv': 'Average Slider Velocity',
    'CT%': 'Cutter Percentage',
    'CTv': 'Average Cutter Velocity',
    'CB%': 'Curveball Percentage',
    'CBv': 'Average Curveball Velocity',
    'CH%': 'Changeup Percentage',
    'CHv': 'Average Changeup Velocity',
    'SF%': 'Split-Finger Percentage',
    'SFv': 'Average Split-Finger Velocity',
    'KN%': 'Knuckleball Percentage',
    'KNv': 'Average Knuckleball Velocity',
    'XX%': 'Unidentified Pitch Percentage',
    'PO%': 'Pitch Out Percentage',
    'wFB': 'Weighted Fastball Runs',
    'wSL': 'Weighted Slider Runs',
    'wCT': 'Weighted Cutter Runs',
    'wCB': 'Weighted Curveball Runs',
    'wCH': 'Weighted Changeup Runs',
    'wSF': 'Weighted Split-Finger Runs',
    'wKN': 'Weighted Knuckleball Runs',
    'wFB/C': ' How well a player hits Fastballs per 100 pitches',
    'wSL/C': 'How well a player hits Sliders per 100 pitches',
    'wCT/C': 'How well a player hits Cutters per 100 pitches',
    'wCB/C': 'How well a player hits Curveballs per 100 pitches',
    'wCH/C': 'How well a player hits Changeups per 100 pitches',
    'wSF/C': 'How well a player hits Splitters per 100 pitches',
    'wKN/C': 'Weighted Knuckleball Runs per 100 pitches',
    'O-Swing%': 'Outside the Zone Swing Percentage',
    'Z-Swing%': 'Inside the Zone Swing Percentage',
    'Swing%': 'Swing Percentage',
    'O-Contact%': 'Outside the Zone Contact Percentage',
    'Z-Contact%': 'Inside the Zone Contact Percentage',
    'Contact%': 'Contact Percentage',
    'Zone%': 'Zone Percentage',
    'F-Strike%': 'First Pitch Strike Percentage',
    'SwStr%': 'Swinging Strike Percentage',
    'BsR': 'Base Running Runs Above Average',
    'FA% (sc)': 'Four Seam Fastball Percentage (sc)',
    'FT% (sc)': 'Two Seam Fastball Percentage (sc)',
    'FC% (sc)': 'Cutter Percentage (sc)',
    'FS% (sc)': 'Split Finger Percentage (sc)',
    'FO% (sc)': 'Forkball Percentage (sc)',
    'SI% (sc)': 'Sinker Percentage (sc)',
    'SL% (sc)': 'Slider Percentage (sc)',
    'CU% (sc)': 'Curveball Percentage (sc)',
    'KC% (sc)': 'Knuckle Curve Percentage (sc)',
    'EP% (sc)': 'Eephus Percentage (sc)',
    'CH% (sc)': 'Changeup Percentage (sc)',
    'SC% (sc)': 'Screwball Percentage (sc)',
    'KN% (sc)': 'Knuckleball Percentage (sc)',
    'UN% (sc)': 'Unknown Pitch Percentage (sc)',
    'vFA (sc)': 'Four Seam Fastball Velocity (sc)',
    'vFT (sc)': 'Two Seam Fastball Velocity (sc)',
    'vFC (sc)': 'Cutter Velocity (sc)',
    'vFS (sc)': 'Split Finger Velocity (sc)',
    'vFO (sc)': 'Forkball Velocity (sc)',
    'vSI (sc)': 'Sinker Velocity (sc)',
    'vSL (sc)': 'Slider Velocity (sc)',
    'vCU (sc)': 'Curveball Velocity (sc)',
    'vKC (sc)': 'Knuckle Curve Velocity (sc)',
    'vEP (sc)': 'Eephus Velocity (sc)',
    'vCH (sc)': 'Changeup Velocity (sc)',
    'vSC (sc)': 'Screwball Velocity (sc)',
    'vKN (sc)': 'Knuckleball Velocity (sc)',
    'FA-X (sc)': 'Four Seam Fastball Horizontal Movement (sc)',
    'FT-X (sc)': 'Two Seam Fastball Horizontal Movement (sc)',
    'FC-X (sc)': 'Cutter Horizontal Movement (sc)',
    'FS-X (sc)': 'Split Finger Horizontal Movement (sc)',
    'FO-X (sc)': 'Forkball Horizontal Movement (sc)',
    'SI-X (sc)': 'Sinker Horizontal Movement (sc)',
    'SL-X (sc)': 'Slider Horizontal Movement (sc)',
    'CU-X (sc)': 'Curveball Horizontal Movement (sc)',
    'KC-X (sc)': 'Knuckle Curve Horizontal Movement (sc)',
    'EP-X (sc)': 'Eephus Horizontal Movement (sc)',
    'CH-X (sc)': 'Changeup Horizontal Movement (sc)',
    'SC-X (sc)': 'Screwball Horizontal Movement (sc)',
    'KN-X (sc)': 'Knuckleball Horizontal Movement (sc)',
    'FA-Z (sc)': 'Four Seam Fastball Vertical Movement (sc)',
    'FT-Z (sc)': 'Two Seam Fastball Vertical Movement (sc)',
    'FC-Z (sc)': 'Cutter Vertical Movement (sc)',
    'FS-Z (sc)': 'Split Finger Vertical Movement (sc)',
    'FO-Z (sc)': 'Forkball Vertical Movement (sc)',
    'SI-Z (sc)': 'Sinker Vertical Movement (sc)',
    'SL-Z (sc)': 'Slider Vertical Movement (sc)',
    'CU-Z (sc)': 'Curveball Vertical Movement (sc)',
    'KC-Z (sc)': 'Knuckle Curve Vertical Movement (sc)',
    'EP-Z (sc)': 'Eephus Vertical Movement (sc)',
    'CH-Z (sc)': 'Changeup Vertical Movement (sc)',
    'SC-Z (sc)': 'Screwball Vertical Movement (sc)',
    'KN-Z (sc)': 'Knuckleball Vertical Movement (sc)',
    'wFA (sc)': 'Weighted Four Seam Fastball Runs (sc)',
    'wFT (sc)': 'Weighted Two Seam Fastball Runs (sc)',
    'wFC (sc)': 'Weighted Cutter Runs (sc)',
    'wFS (sc)': 'Weighted Split Finger Runs (sc)',
    'wFO (sc)': 'Weighted Forkball Runs (sc)',
    'wSI (sc)': 'Weighted Sinker Runs (sc)',
    'wSL (sc)': 'Weighted Slider Runs (sc)',
    'wCU (sc)': 'Weighted Curveball Runs (sc)',
    'wKC (sc)': 'Weighted Knuckle Curve Runs (sc)',
    'wEP (sc)': 'Weighted Eephus Runs (sc)',
    'wCH (sc)': 'Weighted Changeup Runs (sc)',
    'wSC (sc)': 'Weighted Screwball Runs (sc)',
    'wKN (sc)': 'Weighted Knuckleball Runs (sc)',
    'wFA/C (sc)': 'Weighted Four Seam Fastball Runs per 100 pitches (sc)',
    'wFT/C (sc)': 'Weighted Two Seam Fastball Runs per 100 pitches (sc)',
    'wFC/C (sc)': 'Weighted Cutter Runs per 100 pitches (sc)',
    'wFS/C (sc)': 'Weighted Split Finger Runs per 100 pitches (sc)',
    'wFO/C (sc)': 'Weighted Forkball Runs per 100 pitches (sc)',
    'wSI/C (sc)': 'Weighted Sinker Runs per 100 pitches (sc)',
    'wSL/C (sc)': 'Weighted Slider Runs per 100 pitches (sc)',
    'wCU/C (sc)': 'Weighted Curveball Runs per 100 pitches (sc)',
    'wKC/C (sc)': 'Weighted Knuckle Curve Runs per 100 pitches (sc)',
    'wEP/C (sc)': 'Weighted Eephus Runs per 100 pitches (sc)',
    'wCH/C (sc)': 'Weighted Changeup Runs per 100 pitches (sc)',
    'wSC/C (sc)': 'Weighted Screwball Runs per 100 pitches (sc)',
    'wKN/C (sc)': 'Weighted Knuckleball Runs per 100 pitches (sc)',
    'O-Swing% (sc)': 'Outside the Zone Swing Percentage (sc)',
    'Z-Swing% (sc)': 'Inside the Zone Swing Percentage (sc)',
    'Swing% (sc)': 'Swing Percentage (sc)',
    'O-Contact% (sc)': 'Outside the Zone Contact Percentage (sc)',
    'Z-Contact% (sc)': 'Inside the Zone Contact Percentage (sc)',
    'Contact% (sc)': 'Contact Percentage (sc)',
    'Zone% (sc)': 'Zone Percentage (sc)',
    'Pace': 'Average Time Between Pitches',
    'Def': 'Defensive Runs Above Average',
    'wSB': 'Weighted Stolen Base Runs Above Average',
    'UBR': 'Ultimate Base Running',
    'Age Rng': 'Age Range',
    'Off': 'Offensive Runs Above Average',
    'Lg': 'League Adjustment',
    'wGDP': 'Weighted Grounded Into Double Play Runs',
    'Pull%': 'Pull Percentage',
    'Cent%': 'Center Percentage',
    'Oppo%': 'Opposite Field Percentage',
    'Soft%': 'Soft Contact Percentage',
    'Med%': 'Medium Contact Percentage',
    'Hard%': 'Hard Contact Percentage',
    'TTO%': 'Three True Outcomes Percentage',
    'CH% (pi)': 'Changeup Percentage (pi)',
    'CS% (pi)': 'Caught Stealing Percentage (pi)',
    'CU% (pi)': 'Curveball Percentage (pi)',
    'FA% (pi)': 'Four Seam Fastball Percentage (pi)',
    'FC% (pi)': 'Cutter Percentage (pi)',
    'FS% (pi)': 'Split Finger Percentage (pi)',
    'KN% (pi)': 'Knuckleball Percentage (pi)',
    'SB% (pi)': 'Stolen Base Percentage (pi)',
    'SI% (pi)': 'Sinker Percentage (pi)',
    'SL% (pi)': 'Slider Percentage (pi)',
    'XX% (pi)': 'Unidentified Pitch Percentage (pi)',
    'vCH (pi)': 'Changeup Velocity (pi)',
    'vCS (pi)': 'Caught Stealing Velocity (pi)',
    'vCU (pi)': 'Curveball Velocity (pi)',
    'vFA (pi)': 'Four Seam Fastball Velocity (pi)',
    'vFC (pi)': 'Cutter Velocity (pi)',
    'vFS (pi)': 'Split Finger Velocity (pi)',
    'vKN (pi)': 'Knuckleball Velocity (pi)',
    'vSB (pi)': 'Stolen Base Velocity (pi)',
    'vSI (pi)': 'Sinker Velocity (pi)',
    'vSL (pi)': 'Slider Velocity (pi)',
    'vXX (pi)': 'Unidentified Pitch Velocity (pi)',
    'CH-X (pi)': 'Changeup Horizontal Movement (pi)',
    'CS-X (pi)': 'Caught Stealing Horizontal Movement (pi)',
    'CU-X (pi)': 'Curveball Horizontal Movement (pi)',
    'FA-X (pi)': 'Four Seam Fastball Horizontal Movement (pi)',
    'FC-X (pi)': 'Cutter Horizontal Movement (pi)',
    'FS-X (pi)': 'Split Finger Horizontal Movement (pi)',
    'KN-X (pi)': 'Knuckleball Horizontal Movement (pi)',
    'SB-X (pi)': 'Stolen Base Horizontal Movement (pi)',
    'SI-X (pi)': 'Sinker Horizontal Movement (pi)',
    'SL-X (pi)': 'Slider Horizontal Movement (pi)',
    'XX-X (pi)': 'Unidentified Pitch Horizontal Movement (pi)',
    'CH-Z (pi)': 'Changeup Vertical Movement (pi)',
    'CS-Z (pi)': 'Caught Stealing Vertical Movement (pi)',
    'CU-Z (pi)': 'Curveball Vertical Movement (pi)',
    'FA-Z (pi)': 'Four Seam Fastball Vertical Movement (pi)',
    'FC-Z (pi)': 'Cutter Vertical Movement (pi)',
    'FS-Z (pi)': 'Split Finger Vertical Movement (pi)',
    'KN-Z (pi)': 'Knuckleball Vertical Movement (pi)',
    'SB-Z (pi)': 'Stolen Base Vertical Movement (pi)',
    'SI-Z (pi)': 'Sinker Vertical Movement (pi)',
    'SL-Z (pi)': 'Slider Vertical Movement (pi)',
    'XX-Z (pi)': 'Unidentified Pitch Vertical Movement (pi)',
    'wCH (pi)': 'Weighted Changeup Runs (pi)',
    'wCS (pi)': 'Weighted Caught Stealing Runs (pi)',
    'wCU (pi)': 'Weighted Curveball Runs (pi)',
    'wFA (pi)': 'Weighted Four Seam Fastball Runs (pi)',
    'wFC (pi)': 'Weighted Cutter Runs (pi)',
    'wFS (pi)': 'Weighted Split Finger Runs (pi)',
    'wKN (pi)': 'Weighted Knuckleball Runs (pi)',
    'wSB (pi)': 'Weighted Stolen Base Runs (pi)',
    'wSI (pi)': 'Weighted Sinker Runs (pi)',
    'wSL (pi)': 'Weighted Slider Runs (pi)',
    'wXX (pi)': 'Weighted Unidentified Pitch Runs (pi)',
    'wCH/C (pi)': 'Weighted Changeup Runs per 100 pitches (pi)',
    'wCS/C (pi)': 'Weighted Caught Stealing Runs per 100 pitches (pi)',
    'wCU/C (pi)': 'Weighted Curveball Runs per 100 pitches (pi)',
    'wFA/C (pi)': 'Weighted Four Seam Fastball Runs per 100 pitches (pi)',
    'wFC/C (pi)': 'Weighted Cutter Runs per 100 pitches (pi)',
    'wFS/C (pi)': 'Weighted Split Finger Runs per 100 pitches (pi)',
    'wKN/C (pi)': 'Weighted Knuckleball Runs per 100 pitches (pi)',
    'wSB/C (pi)': 'Weighted Stolen Base Runs per 100 pitches (pi)',
    'wSI/C (pi)': 'Weighted Sinker Runs per 100 pitches (pi)',
    'wSL/C (pi)': 'Weighted Slider Runs per 100 pitches (pi)',
    'wXX/C (pi)': 'Weighted Unidentified Pitch Runs per 100 pitches (pi)',
    'O-Swing% (pi)': 'Outside the Zone Swing Percentage (pi)',
    'Z-Swing% (pi)': 'Inside the Zone Swing Percentage (pi)',
    'Swing% (pi)': 'Swing Percentage (pi)',
    'O-Contact% (pi)': 'Outside the Zone Contact Percentage (pi)',
    'Z-Contact% (pi)': 'Inside the Zone Contact Percentage (pi)',
    'Contact% (pi)': 'Contact Percentage (pi)',
    'Zone% (pi)': 'Zone Percentage (pi)',
    'Pace (pi)': 'Average Time Between Pitches (pi)',
    'FRM': 'Framing Runs',
    'AVG+': 'Batting Average Plus',
    'BB%+': 'Walk Percentage Plus',
    'K%+': 'Strikeout Percentage Plus',
    'OBP+': 'On Base Percentage Plus',
    'SLG+': 'Slugging Percentage Plus',
    'ISO+': 'Isolated Power Plus',
    'BABIP+': 'Batting Average on Balls in Play Plus',
    'LD+%': 'Line Drive Percentage Plus',
    'GB%+': 'Ground Ball Percentage Plus',
    'FB%+': 'Fly Ball Percentage Plus',
    'HR/FB%+': 'Home Run to Fly Ball Percentage Plus',
    'Pull%+': 'Pull Percentage Plus',
    'Cent%+': 'Center Percentage Plus',
    'Oppo%+': 'Opposite Field Percentage Plus',
    'Soft%+': 'Soft Contact Percentage Plus',
    'Med%+': 'Medium Contact Percentage Plus',
    'Hard%+': 'Hard Contact Percentage Plus',
    'EV': 'Exit Velocity',
    'LA': 'Launch Angle',
    'Barrels': 'Number of Barrels',
    'Barrel%': 'Barrel Percentage',
    'maxEV': 'Maximum Exit Velocity',
    'HardHit': 'Number of Hard Hit Balls',
    'HardHit%': 'Hard Hit Percentage',
    'Events': 'Number of Events',
    'CStr%': 'Called Strike Percentage',
    'CSW%': 'Called Strikes and Whiffs Percentage',
    'xBA': 'Expected Batting Average',
    'xSLG': 'Expected Slugging Percentage',
    'xwOBA': 'Expected Weighted On Base Average',
    'L-WAR': 'Location Adjusted WAR'
}

In [8]:
def get_player_info(content):

    completion = client.chat.completions.create(
        model="infiniti-gpt4",
        messages=[{"role": "user", "content": content}],
        functions=[
            {
                "name": "get_player_id",
                "description": "Get the player fangraphs and MLBAM ID for a given player",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "last_name": {
                            "type": "string",
                            "description": "The player's last name",
                        },
                        "first_name": {
                            "type": "string",
                            "description": "The player's first name",
                        },
                    },
                    "required": ["last_name", "first_name"],
                },
            }
        ],
        function_call="auto",
    )

    # Accessing the function call arguments in the new format
    reply_content = completion.choices[0].message
    print(reply_content)
    
    if reply_content.function_call and reply_content.function_call.arguments:
        function_call_arguments = reply_content.function_call.arguments
        print(function_call_arguments)
        
        # Parsing the function call arguments
        funcs = json.loads(function_call_arguments)
        
        # Calling the get_player_id function with parsed arguments
        player_info = get_player_id(funcs['last_name'], funcs['first_name'])
        
        return player_info
    else:
        return {"error": "No function call arguments found."}


In [9]:
def get_player_stats(fg_id, year, stats):
    """
    Get specific stats for a given player in a given year.
    
    Args:
    fg_id (int): The Fangraphs player ID.
    year (int): The year of the stats.
    stats (list or str): The stat or list of stats to retrieve.
    
    Returns:
    dict: A dictionary with the player's name and the requested stats.
    """
    # Ensure stats is a list
    if isinstance(stats, str):
        stats = [stats]
    
    # Get the batting stats for the given year
    df = pybaseball.batting_stats(year)
    
    # Find the player's row by player ID
    player_row = df[df['IDfg'] == fg_id]
    
    if player_row.empty:
        return {"error": "Player not found"}
    
    # Extract the player's name and the requested stats
    player_stats = {
        "name": player_row.iloc[0]['Name'],
        "year": year
    }
    
    for stat in stats:
        if stat in player_row.columns:
            player_stats[stat] = player_row.iloc[0][stat]
        else:
            player_stats[stat] = "Stat not found"
    
    return player_stats

In [10]:

def map_suggested_to_actual_stats(suggested_stats, available_stats):
    """
    Uses a GPT model to map suggested stats to actual stats.

    Args:
    suggested_stats (list of str): The list of suggested stats from the model.
    available_stats (dict): The dictionary of available stats with explanations.

    Returns:
    list of str: The list of mapped stats that correspond to the actual stats.
    """
    # Convert available_stats dictionary to a list of strings for the prompt
    available_stats_list = [f"{key}: {value}" for key, value in available_stats.items()]

    # System message with instructions for the GPT model
    system_message = f"""
    You are provided with a list of suggested statistics and a dictionary of available statistics with explanations from a baseball dataset.
    
    Your task is to evaluate each suggested statistic one at a time and map it to the most relevant actual statistic from the available stats. 

    - Suggested Stats: {suggested_stats}
    - Available Stats: {available_stats_list}
    
    For each suggested statistic:
    1. Consider the meaning and context of the suggested statistic.
    2. Evaluate the available statistics and find the best match based on semantic meaning and relevance.
    3. Map the suggested statistic to the most appropriate actual statistic.
    
    Focus on finding the best fit for each suggested statistic to ensure accurate and meaningful mappings. Provide the final mappings in a pythion list, similiar to ['Stat1', 'Stat2']

    YOUR FINAL MAPPINGS MUST BE IN THE FORMAT OF A PYTHON LIST: ['Stat 1', 'Stat 2', ...], NOTHING ELSE, ONLY A SINGLE PYTHON LIST, ONLY INCLUDE THE STAT KEYS
    """

    messages = [{"role": "system", "content": system_message}]
    response = get_completion_from_messages(messages)
    response = response.strip()
        # Process the response to extract the actual stat abbreviation
    
    return  response


def get_completion_from_messages(messages, 
                                 model="infiniti-gpt4",
                                 temperature=0, 
                                 max_tokens=600):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=max_tokens, 
    )
    return response.choices[0].message.content

In [25]:
def get_player_stats_from_message(content):
    """
    Main function to handle content message, identify parameters, call the necessary functions,
    and return the player's stats along with token counts and costs.
    """
    total_input_tokens = num_tokens_from_string(content, "gpt-4")
    
    # Step 1: Call the first function to get the player ID
    completion = client.chat.completions.create(
        model="infiniti-gpt4",
        messages=[{"role": "user", "content": content}],
        functions=[
            {
                "name": "get_player_id",
                "description": "Get the player MLBAM ID for a given player",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "last_name": {
                            "type": "string",
                            "description": "The player's last name",
                        },
                        "first_name": {
                            "type": "string",
                            "description": "The player's first name",
                        },
                    },
                    "required": ["last_name", "first_name"],
                },
            },
            {
                "name": "get_player_stats",
                "description": "Get the player stats for a given player and year. The model should interpret the user's request to determine the necessary stats.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "fg_id": {
                            "type": "integer",
                            "description": "The Fangraphs player ID",
                        },
                        "year": {
                            "type": "integer",
                            "description": "The year of the stats",
                        },
                        "stats": {
                            "type": "array",
                            "items": {
                                "type": "string"
                            },
                            "description": "This is the list of stats we are interested in receiving. You must interpret the user's request and create a list of either one or multiple statistics using your knowledge of baseball to identify what those stats are.",
                        },
                    },
                    "required": ["fg_id", "year", "stats"],
                },
            }
        ],
        function_call="auto",
    )

    # Extract the response from the first function call
    message_content = completion.choices[0].message.content
    function_call_arguments = json.loads(completion.choices[0].message.function_call.arguments)
    output_tokens_1 = num_tokens_from_string(message_content, "gpt-4")

    # Check if the function call is to get_player_id
    if completion.choices[0].message.function_call.name == "get_player_id":
        player_info = get_player_id(function_call_arguments['last_name'], function_call_arguments['first_name'])
        if "error" in player_info:
            return json.dumps(player_info)

        # Ensure all values in player_info are native Python types
        player_info = {k: int(v) if isinstance(v, (np.integer, np.int64, np.int32)) else v for k, v in player_info.items()}

        # Call the second function to get the player stats using the player ID
        completion = client.chat.completions.create(
            model="infiniti-gpt4",
            messages=[
                {"role": "user", "content": content},
                {"role": "assistant", "content": None, "function_call": {
                    "name": "get_player_id",
                    "arguments": json.dumps(function_call_arguments)
                }},
                {"role": "function", "name": "get_player_id", "content": json.dumps(player_info)}
            ],
            functions=[
                {
                    "name": "get_player_id",
                    "description": "Get the player MLBAM ID for a given player",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "last_name": {
                                "type": "string",
                                "description": "The player's last name",
                            },
                            "first_name": {
                                "type": "string",
                                "description": "The player's first name",
                            },
                        },
                        "required": ["last_name", "first_name"],
                    },
                },
                {
                    "name": "get_player_stats",
                    "description": "Get the player stats for a given player and year. The model should interpret the user's request to determine the necessary stats.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "fg_id": {
                                "type": "integer",
                                "description": "The Fangraphs player ID",
                            },
                            "year": {
                                "type": "integer",
                                "description": "The year of the stats",
                            },
                            "stats": {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                },
                                "description": "This is the list of stats we are interested in receiving. You must interpret the user's request and create a list of either one or multiple statistics using your knowledge of baseball to identify what those stats are.",
                            },
                        },
                        "required": ["fg_id", "year", "stats"],
                    },
                }
            ],
            function_call="auto",
        )

        # Extract the response from the second function call
        message_content = completion.choices[0].message.content
        function_call_arguments = json.loads(completion.choices[0].message.function_call.arguments)
        output_tokens_2 = num_tokens_from_string(message_content, "gpt-4")

        # Use the second set of function_call_arguments generated by the second calling
        fg_id = function_call_arguments['fg_id']
        year = function_call_arguments['year']
        suggested = function_call_arguments['stats']

        stats = map_suggested_to_actual_stats(suggested, available_stats)
        stats = ast.literal_eval(stats)

        # Get player stats
        player_stats = get_player_stats(fg_id, year, stats)
        player_stats = {k: int(v) if isinstance(v, (np.integer, np.int64, np.int32)) else v for k, v in player_stats.items()}
        output_tokens_3 = num_tokens_from_string(json.dumps(player_stats), "gpt-4")

        final_output_tokens = output_tokens_1 + output_tokens_2 + output_tokens_3
        print(f"Input tokens: {total_input_tokens}")
        print(f"Input Cost: ${(total_input_tokens / 1000) * 0.06}")
        print(f"Output tokens: {final_output_tokens}")
        print(f"Output Cost: ${(final_output_tokens / 1000) * 0.12}")

        return json.dumps(player_stats, indent=4)

In [26]:
content_message = "What was Gunnar Hendersons Zone Contact Percentage, and the average fastball velocity he faced in 2023?"
result = get_player_stats_from_message(content_message)
print(f'Prompt: {content_message}')
print(f'Output: {result}')

TypeError: expected string or buffer

In [34]:
def get_player_stats_from_message(content):
    """
    Main function to handle content message, identify parameters, call the necessary functions,
    and return the player's stats.
    """
    # Step 1: Call the first function to get the player ID
    completion = client.chat.completions.create(
        model="infiniti-gpt4",
        messages=[{"role": "user", "content": content}],
        functions=[
            {
                "name": "get_player_id",
                "description": "Get the player MLBAM ID for a given player",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "last_name": {
                            "type": "string",
                            "description": "The player's last name",
                        },
                        "first_name": {
                            "type": "string",
                            "description": "The player's first name",
                        },
                    },
                    "required": ["last_name", "first_name"],
                },
            },
            {
                "name": "get_player_stats",
                "description": "Get the player stats for a given player and year. The model should interpret the user's request to determine the necessary stats.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "fg_id": {
                            "type": "integer",
                            "description": "The Fangraphs player ID",
                        },
                        "year": {
                            "type": "integer",
                            "description": "The year of the stats",
                        },
                        "stats": {
                            "type": "array",
                            "items": {
                                "type": "string"
                            },
                            "description": "This is the list of stats we are interested in receiving. You must interpret the user's request and create a list of either one or multiple statistics using your knowledge of baseball to identify what those stats are.",
                        },
                    },
                    "required": ["fg_id", "year", "stats"],
                },
            }
        ],
        function_call="auto",
    )

    # Extract the response from the first function call
    reply_content = completion.choices[0].message
    completion_tokens_1 = completion.usage.completion_tokens
    prompt_tokens_1 = completion.usage.prompt_tokens
    function_call_arguments = json.loads(reply_content.function_call.arguments)
    

    # Check if the function call is to get_player_id
    if reply_content.function_call.name == "get_player_id":
        player_info = get_player_id(function_call_arguments['last_name'], function_call_arguments['first_name'])
        if "error" in player_info:
            return json.dumps(player_info)

        # Ensure all values in player_info are native Python types
        player_info = {k: int(v) if isinstance(v, (np.integer, np.int64, np.int32)) else v for k, v in player_info.items()}

        # Call the second function to get the player stats using the player ID
        completion = client.chat.completions.create(
            model="infiniti-gpt4",
            messages=[
                {"role": "user", "content": content},
                {"role": "assistant", "content": None, "function_call": {
                    "name": "get_player_id",
                    "arguments": json.dumps(function_call_arguments)
                }},
                {"role": "function", "name": "get_player_id", "content": json.dumps(player_info)}
            ],
            functions=[
                {
                    "name": "get_player_id",
                    "description": "Get the player MLBAM ID for a given player",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "last_name": {
                                "type": "string",
                                "description": "The player's last name",
                            },
                            "first_name": {
                                "type": "string",
                                "description": "The player's first name",
                            },
                        },
                        "required": ["last_name", "first_name"],
                    },
                },
                {
                    "name": "get_player_stats",
                    "description": "Get the player stats for a given player and year. The model should interpret the user's request to determine the necessary stats.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "fg_id": {
                                "type": "integer",
                                "description": "The Fangraphs player ID",
                            },
                            "year": {
                                "type": "integer",
                                "description": "The year of the stats",
                            },
                            "stats": {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                },
                                "description": "This is the list of stats we are interested in receiving. You must interpret the user's request and create a list of either one or multiple statistics using your knowledge of baseball to identify what those stats are.",
                            },
                        },
                        "required": ["fg_id", "year", "stats"],
                    },
                }
            ],
            function_call="auto",
        )

        # Extract the response from the second function call
        reply_content = completion.choices[0].message
        completion_tokens_2 = completion.usage.completion_tokens
        prompt_tokens_2 = completion.usage.prompt_tokens
        function_call_arguments = json.loads(reply_content.function_call.arguments)

        # Use the second set of function_call_arguments generated by the second calling
        fg_id = function_call_arguments['fg_id']
        year = function_call_arguments['year']
        suggested  = function_call_arguments['stats']
        

        stats = map_suggested_to_actual_stats(suggested, available_stats)
        stats = ast.literal_eval(stats)
        total_completion_tokens = completion_tokens_1 + completion_tokens_2
        total_prompt_tokens  = prompt_tokens_1 + prompt_tokens_2
        # Get player stats
        player_stats = get_player_stats(fg_id, year, stats)
        player_stats= {k: int(v) if isinstance(v, (np.integer, np.int64, np.int32)) else v for k, v in player_stats.items()}


        print(f"Total Prompt Tokens: {total_prompt_tokens}")
        print(f"Total Prompt Cost: {(total_prompt_tokens /1000) * 0.12}")

        print(f"Total Completion Tokens: {total_completion_tokens }")
        print(f"Total Completion Cost ($): {(total_completion_tokens /1000) * 0.12 }")
        return json.dumps(player_stats, indent=4)

# Example usage
content_message = "What was Bo Bichette's strikeout rate and games played in 2022?"
result = get_player_stats_from_message(content_message)
print(result)


Total Prompt Tokens: 453
Total Prompt Cost: 0.05436
Total Completion Tokens: 54
Total Completion Cost ($): 0.00648
{
    "name": "Bo Bichette",
    "year": 2022,
    "K%": 0.222,
    "G": 159
}


In [29]:
content_message = "What was Gunnar Hendersons Zone Contact Percentage, and the average fastball velocity he faced in 2023?"
result = get_player_stats_from_message(content_message)
print(f'Prompt: {content_message}')
print(f'Output: {result}')

Prompt: What was Gunnar Hendersons Zone Contact Percentage, and the average fastball velocity he faced in 2023?
Output: {
    "name": "Gunnar Henderson",
    "year": 2023,
    "Z-Contact%": 0.8270000000000001,
    "FBv": 94.2
}


In [30]:
content_message = "How well did Gunnar Henderson hit Sliders and splitters in 2023?"
result = get_player_stats_from_message(content_message)
print(f'Prompt: {content_message}')
print(f'Output: {result}')

Prompt: How well did Gunnar Henderson hit Sliders and splitters in 2023?
Output: {
    "name": "Gunnar Henderson",
    "year": 2023,
    "wSL/C": 0.77,
    "wSF/C": -0.06
}
