In [1]:
# Import packages
import os
import json
import statsapi
import datetime
from datetime import timedelta, datetime
import pandas as pd
from phi.assistant import Assistant
from phi.llm.groq import Groq

In [2]:
api_key = os.getenv('GROQ_API_KEY')

In [3]:
llm_llama70b = Groq(model="llama3-70b-8192", api_key=api_key)
llm_llama8b  = Groq(model="llama3-groq-8b-8192-tool-use-preview", api_key=api_key)
llm_gemma2   = Groq(model="gemma2-9b-it", api_key=api_key)
llm_mixtral  = Groq(model="mixtral-8x7b-32768", api_key=api_key)

In [4]:
def get_game_info(game_date: str, team_name: str) -> str:
    """Gets high-level information on an MLB game.
    
    Params:
    game_date: The date of the game of interest, in the form "yyyy-mm-dd". 
    team_name: MLB team name. Both full name (e.g. "New York Yankees") or nickname ("Yankees") are valid. If multiple teams are mentioned, use the first one
    """
    sched = statsapi.schedule(start_date=game_date,end_date=game_date)
    sched_df = pd.DataFrame(sched)
    game_info_df = sched_df[sched_df['summary'].str.contains(team_name, case=False, na=False)]

    game_info = {
        "game_id": str(game_info_df.game_id.tolist()[0]),
        "home_team": game_info_df.home_name.tolist()[0],
        "home_score": game_info_df.home_score.tolist()[0],
        "away_team": game_info_df.away_name.tolist()[0],
        "away_score": game_info_df.away_score.tolist()[0],
        "winning_team": game_info_df.winning_team.tolist()[0],
        "series_status": game_info_df.series_status.tolist()[0]
    }

    return json.dumps(game_info)


def get_batting_stats(game_id: str) -> str:
    """Gets player boxscore batting stats for a particular MLB game
    
    Params:
    game_id: The 6-digit ID of the game
    """
    boxscores=statsapi.boxscore_data(game_id)
    player_info_df = pd.DataFrame(boxscores['playerInfo']).T.reset_index()

    away_batters_box = pd.DataFrame(boxscores['awayBatters']).iloc[1:]
    away_batters_box['team_name'] = boxscores['teamInfo']['away']['teamName']

    home_batters_box = pd.DataFrame(boxscores['homeBatters']).iloc[1:]
    home_batters_box['team_name'] = boxscores['teamInfo']['home']['teamName']

    batters_box_df = pd.concat([away_batters_box, home_batters_box]).merge(player_info_df, left_on = 'name', right_on = 'boxscoreName')
    batting_stats = batters_box_df[['team_name','fullName','position','ab','r','h','hr','rbi','bb','sb']].to_dict(orient='records')

    return json.dumps(batting_stats)


def get_pitching_stats(game_id: str) -> str:
    """Gets player boxscore pitching stats for a particular MLB game
    
    Params:
    game_id: The 6-digit ID of the game
    """
    boxscores=statsapi.boxscore_data(game_id)
    player_info_df = pd.DataFrame(boxscores['playerInfo']).T.reset_index()

    away_pitchers_box = pd.DataFrame(boxscores['awayPitchers']).iloc[1:]
    away_pitchers_box['team_name'] = boxscores['teamInfo']['away']['teamName']

    home_pitchers_box = pd.DataFrame(boxscores['homePitchers']).iloc[1:]
    home_pitchers_box['team_name'] = boxscores['teamInfo']['home']['teamName']

    pitchers_box_df = pd.concat([away_pitchers_box,home_pitchers_box]).merge(player_info_df, left_on = 'name', right_on = 'boxscoreName')
    pitching_stats = pitchers_box_df[['team_name','fullName','ip','h','r','er','bb','k','note']].to_dict(orient='records')

    return json.dumps(pitching_stats)

In [5]:
default_date = datetime.now().date() - timedelta(1) # Set default date to yesterday in case no date is specified

mlb_researcher = Assistant(
    llm=llm_mixtral,
    description="An detailed accurate MLB researcher extracts game information from the user question",
    instructions=[
        f"Parse the Team and date (use {default_date} if user does not specify) from the user question.",
        "Pass to your get_game_info tool",
        """
        Respond in the following format:
            game_id: game_id
            home_team: home_team
            home_score: home_score
            away_team: away_team
            away_score: away_score
            winning_team: winning_team
            series_status: series_status
        """        
    ],
    tools=[get_game_info],   
)

mlb_batting_statistician = Assistant(
    llm=llm_mixtral,
    description="An industrious MLB Statistician analyzing player boxscore stats for the relevant game",
    instructions=[
        "Given information about a MLB game, retrieve ONLY boxscore player batting stats for the game identified by the MLB Researcher",
        "Your analysis should be atleast 500 words long, and include inning-by-inning statistical summaries",
        ],
    tools=[get_batting_stats],
)

mlb_pitching_statistician = Assistant(
    llm=llm_mixtral,
    description="An industrious MLB Statistician analyzing player boxscore stats for the relevant game",
    instructions=[
        "Given information about a MLB game, retrieve ONLY boxscore player pitching stats for a specific game",
        "Your analysis should be atleast 500 words long, and include inning-by-inning statistical summaries",
        ],
    tools=[get_pitching_stats],
)

mlb_writer_llama = Assistant(
    llm=llm_llama70b,
    description="An experienced, honest, and industrious writer who does not make things up",
    instructions=[
        """
            Write a game recap article using the provided game information and stats.
            Key instructions:
            - Include things like final score, top performers and winning/losing pitcher.
            - Use ONLY the provided data and DO NOT make up any information, such as specific innings when events occurred, that isn't explicitly from the provided input.
            - Do not print the box score
        """,
        "Your recap from the stats should be at least 1000 words. Impress your readers!!!"
    ],
)

mlb_writer_gemma = Assistant(
    llm=llm_gemma2,
    description="An experienced and honest writer who does not make things up",
    instructions=["Write a detailed game recap article using the provided game information and stats"],
)

mlb_writer_mixtral = Assistant(
    llm=llm_mixtral,
    description="An experienced and honest writer who does not make things up",
    instructions=["Write a detailed game recap article using the provided game information and stats"],
)

mlb_editor = Assistant(
    llm=llm_llama70b,
    description="An experienced editor that excels at taking the best parts of multiple texts to create the best final product",
    instructions=["Edit recap articles to create the best final product."],
)

In [6]:
user_prompt = 'write a recap of the Yankees game on July 14, 2024'

In [7]:
game_information = mlb_researcher.run(user_prompt, stream=False)

batting_stats  = mlb_batting_statistician.run(game_information, stream=False)
pitching_stats = mlb_pitching_statistician.run(game_information, stream=False)

stats = f"Statistical summaries for the game:\n\nBatting stats:\n{batting_stats}\n\nPitching stats:\n{pitching_stats}"
llama_writer   = mlb_writer_llama.run(stats, stream=False)
gemma_writer   = mlb_writer_gemma.run(stats, stream=False)
mixtral_writer = mlb_writer_mixtral.run(stats, stream=False)

# Edit final outputs
editor_inputs = [llama_writer, gemma_writer, mixtral_writer]
editor = mlb_editor.run("\n".join(editor_inputs), stream=False)

print(editor)

Here is a rewritten and edited version of the articles, combining the best parts to create a cohesive and engaging final product:

**Orioles Edge Yankees in Thrilling 6-5 Victory**

In a nail-biting affair, the Baltimore Orioles outlasted the New York Yankees 6-5 on July 14, 2024, at home, showcasing their grit and determination in a back-and-forth battle.

The Yankees drew first blood in the top of the 1st inning, courtesy of a single by shortstop Anthony Volpe and a two-run homer from center fielder Trent Grisham, putting the visitors up 2-0. However, the Orioles responded swiftly, with Gunnar Henderson's solo shot in the bottom of the 1st reducing the deficit to 2-1.

The seesaw battle continued in the top of the 2nd, as Ben Rice's solo homer restored the Yankees' two-run cushion, only for the Orioles to bridge the gap once more. Singles from Anthony Santander and Jorge Mateo set the stage for Cedric Mullins' RBI single, bringing the score to 3-2 in favor of the Yankees.

Despite op