# F1 Race Report Generator

Two-agent system with memory:
- **Agent 1**: Data Collection (FastF1)
- **Agent 2**: Report Generation (Gemini)
- **Memory**: Store and retrieve race reports

## 1. Setup

In [None]:
# Install dependencies (python-dotenv only needed for local development)
%pip install -q google-cloud-aiplatform==1.75.0 google-adk==0.1.5 fastf1==3.4.5 pandas==2.2.3 nest-asyncio==1.6.0

## 2. Imports & Configuration

In [None]:
import os
import json
from datetime import datetime
from typing import Dict, List, Optional, Any
import pandas as pd
import fastf1
import vertexai
import nest_asyncio
from vertexai.generative_models import GenerativeModel

# Configuration - Supports both Kaggle and local environments
# For Kaggle: Use Kaggle Secrets (see KAGGLE_SUBMISSION.md)
# For local: Use .env file or environment variables

# Try to load from Kaggle Secrets (if running in Kaggle)
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    gcp_creds = user_secrets.get_secret("GCP_SERVICE_ACCOUNT")
    
    # Write credentials to file for Vertex AI
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/tmp/gcp_credentials.json'
    with open('/tmp/gcp_credentials.json', 'w') as f:
        f.write(gcp_creds)
    print("‚úì Loaded GCP credentials from Kaggle Secrets")
except:
    # Fallback: Try local .env file or environment variables
    try:
        from dotenv import load_dotenv
        load_dotenv()
        print("‚úì Loaded configuration from .env file")
    except:
        print("‚Ñπ Using default configuration (set GCP_PROJECT_ID and GCP_LOCATION if needed)")

# Project configuration
PROJECT_ID = os.getenv('GCP_PROJECT_ID', 'gen-lang-client-0467867580')
LOCATION = os.getenv('GCP_LOCATION', 'us-central1')
MODEL_NAME = 'gemini-2.5-flash'

# Initialize Vertex AI
vertexai.init(project=PROJECT_ID, location=LOCATION)
nest_asyncio.apply()
fastf1.Cache.enable_cache('f1_cache')

print(f"‚úì Environment configured")
print(f"  Project: {PROJECT_ID}")
print(f"  Location: {LOCATION}")
print(f"  Model: {MODEL_NAME}")


## 3. Agent Engine Setup


In [None]:
# Initialize Vertex AI Agent Engine for Memory Bank
client = vertexai.Client(project=PROJECT_ID, location=LOCATION)

# Try to get existing agent engine or create new one
try:
    # List existing agent engines
    agent_engines = list(client.agent_engines.list())
    if agent_engines:
        agent_engine = agent_engines[0]
        print(f"Using existing Agent Engine: {agent_engine.api_resource.name}")
    else:
        # Create new agent engine
        agent_engine = client.agent_engines.create()
        print(f"Created new Agent Engine: {agent_engine.api_resource.name}")
    
    agent_engine_id = agent_engine.api_resource.name.split("/")[-1]
    print(f"Engine ID: {agent_engine_id}")
except Exception as e:
    print(f"Error initializing Agent Engine: {e}")
    raise


## 4. Memory Service (Vertex AI Memory Bank)


In [None]:
import asyncio
from google.adk.memory import VertexAiMemoryBankService

class MemoryService:
    """Persistent storage for race reports using Vertex AI Memory Bank + local backup."""
    
    def __init__(self, project: str, location: str, agent_engine_id: str, backup_file: str = "f1_reports_backup.json"):
        self._service = VertexAiMemoryBankService(
            project=project,
            location=location,
            agent_engine_id=agent_engine_id
        )
        self._cache = {}  # Local cache for quick access
        self._backup_file = backup_file
        self._load_from_backup()  # Load from local backup first
    
    def _load_from_backup(self):
        """Load reports from local JSON backup file."""
        try:
            if os.path.exists(self._backup_file):
                with open(self._backup_file, 'r', encoding='utf-8') as f:
                    self._cache = json.load(f)
                print(f"Loaded {len(self._cache)} report(s) from local backup")
            else:
                print("No local backup found, starting fresh")
        except Exception as e:
            print(f"Error loading backup: {e}")
    
    def _save_to_backup(self):
        """Save reports to local JSON backup file."""
        try:
            with open(self._backup_file, 'w', encoding='utf-8') as f:
                json.dump(self._cache, f, indent=2, ensure_ascii=False)
        except Exception as e:
            print(f"Error saving backup: {e}")
    
    def add_session_to_memory(self, race_id: str, report_data: Dict[str, Any]) -> None:
        """Store a race report in Memory Bank and local backup."""
        try:
            timestamp = datetime.now().isoformat()
            entry = {
                "data": report_data,
                "timestamp": timestamp
            }
            
            # Store in local cache
            self._cache[race_id] = entry
            
            # Save to local backup file immediately
            self._save_to_backup()
            
            # Store in Memory Bank (async operation wrapped in sync)
            loop = asyncio.get_event_loop()
            if loop.is_running():
                asyncio.ensure_future(self._async_add_session(race_id, entry))
            else:
                loop.run_until_complete(self._async_add_session(race_id, entry))
        except Exception as e:
            print(f"Error storing in Memory Bank: {e}")
            raise
    
    async def _async_add_session(self, race_id: str, entry: Dict[str, Any]):
        """Async helper to add session to Memory Bank."""
        # Create a session object that Memory Bank expects
        from google.adk.sessions import Session
        session = Session(
            session_id=race_id,
            user_id="f1_report_system",
            metadata=entry
        )
        await self._service.add_session_to_memory(session)
    
    def _search_local_cache(self, query: str) -> List[Dict[str, Any]]:
        """Search stored reports in local cache by race_id or GP name."""
        results = []
        query_lower = query.lower()
        
        for race_id, entry in self._cache.items():
            # Search in race_id and GP name
            gp_name = entry['data'].get('race_data', {}).get('gp_info', {}).get('name', '')
            if query_lower in race_id.lower() or query_lower in gp_name.lower():
                results.append({
                    "race_id": race_id,
                    "gp_name": gp_name,
                    "timestamp": entry['timestamp']
                })
        
        return results

    async def _async_search_memory_vertex(self, query: str) -> List[Dict[str, Any]]:
        """Async helper to search Vertex AI Memory Bank and map results to local cache entries."""
        try:
            # Search with required parameters
            response = await self._service.search_memory(
                query=query,
                app_name="f1_report_system",
                user_id="f1_report_system"
            )
        except Exception as e:
            print(f"Vertex AI Memory Bank search error: {e}")
            return []
        
        results = []
        memories = getattr(response, "memories", None) or getattr(response, "Memories", None) or []
        for mem in memories:
            # Try to get a session_id / race_id from the memory result
            race_id = getattr(mem, "session_id", None)
            entry = None

            # Some implementations may nest a Session object
            if hasattr(mem, "session"):
                session_obj = getattr(mem, "session")
                if session_obj is not None:
                    race_id = race_id or getattr(session_obj, "session_id", None)
                    metadata = getattr(session_obj, "metadata", None)
                    if isinstance(metadata, dict):
                        entry = metadata

            # Fallback: some implementations may expose metadata / data directly
            if entry is None:
                metadata = getattr(mem, "metadata", None)
                if isinstance(metadata, dict):
                    entry = metadata

            if not race_id:
                # Without a race_id we can't map cleanly; skip this memory
                continue

            # Prefer our local cache copy when available
            if race_id in self._cache:
                entry = self._cache[race_id]

            if not entry:
                continue

            # Keep cache up to date if we learned this entry from Vertex
            if race_id not in self._cache:
                self._cache[race_id] = entry

            gp_name = entry['data'].get('race_data', {}).get('gp_info', {}).get('name', '')
            results.append({
                "race_id": race_id,
                "gp_name": gp_name,
                "timestamp": entry['timestamp']
            })
        
        return results

    def search_memory(self, query: str) -> List[Dict[str, Any]]:
        """Search stored reports, preferring Vertex AI Memory Bank and falling back to local cache."""
        # Try Vertex AI Memory Bank first
        try:
            try:
                loop = asyncio.get_event_loop()
            except RuntimeError:
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)

            vertex_results = loop.run_until_complete(self._async_search_memory_vertex(query))
            if vertex_results:
                return vertex_results
        except Exception as e:
            print(f"Vertex search failed, using local cache: {e}")
        
        # Fallback: local cache search (previous behavior)
        return self._search_local_cache(query)
    
    def get_report(self, race_id: str) -> Optional[Dict[str, Any]]:
        """Retrieve a specific report."""
        return self._cache.get(race_id)
    
    def list_all(self) -> List[Dict[str, Any]]:
        """List all stored reports."""
        return [{
            "race_id": race_id,
            "gp_name": entry['data'].get('race_data', {}).get('gp_info', {}).get('name', 'Unknown'),
            "timestamp": entry['timestamp']
        } for race_id, entry in self._cache.items()]

# Initialize memory service with Vertex AI Memory Bank + local backup
memory = MemoryService(
    project=PROJECT_ID,
    location=LOCATION,
    agent_engine_id=agent_engine_id
)
print("Memory service initialized (Vertex AI Memory Bank + local backup)")


In [None]:
# F1 Calendar (2024/2025 - compatible structure)
F1_2025_CALENDAR = {
    1: {"name": "Bahrain Grand Prix", "circuit": "Bahrain International Circuit"},
    2: {"name": "Saudi Arabian Grand Prix", "circuit": "Jeddah Corniche Circuit"},
    3: {"name": "Australian Grand Prix", "circuit": "Albert Park Circuit"},
    4: {"name": "Japanese Grand Prix", "circuit": "Suzuka International Racing Course"},
    5: {"name": "Chinese Grand Prix", "circuit": "Shanghai International Circuit"},
    6: {"name": "Miami Grand Prix", "circuit": "Miami International Autodrome"},
    7: {"name": "Emilia Romagna Grand Prix", "circuit": "Autodromo Enzo e Dino Ferrari"},
    8: {"name": "Monaco Grand Prix", "circuit": "Circuit de Monaco"},
    9: {"name": "Spanish Grand Prix", "circuit": "Circuit de Barcelona-Catalunya"},
    10: {"name": "Canadian Grand Prix", "circuit": "Circuit Gilles Villeneuve"},
    11: {"name": "Austrian Grand Prix", "circuit": "Red Bull Ring"},
    12: {"name": "British Grand Prix", "circuit": "Silverstone Circuit"},
    13: {"name": "Belgian Grand Prix", "circuit": "Circuit de Spa-Francorchamps"},
    14: {"name": "Hungarian Grand Prix", "circuit": "Hungaroring"},
    15: {"name": "Dutch Grand Prix", "circuit": "Circuit Zandvoort"},
    16: {"name": "Italian Grand Prix", "circuit": "Autodromo Nazionale di Monza"},
    17: {"name": "Azerbaijan Grand Prix", "circuit": "Baku City Circuit"},
    18: {"name": "Singapore Grand Prix", "circuit": "Marina Bay Street Circuit"},
    19: {"name": "United States Grand Prix", "circuit": "Circuit of the Americas"},
    20: {"name": "Mexico City Grand Prix", "circuit": "Aut√≥dromo Hermanos Rodr√≠guez"},
    21: {"name": "S√£o Paulo Grand Prix", "circuit": "Aut√≥dromo Jos√© Carlos Pace"},
    22: {"name": "Las Vegas Grand Prix", "circuit": "Las Vegas Street Circuit"},
    23: {"name": "Qatar Grand Prix", "circuit": "Lusail International Circuit"},
    24: {"name": "Abu Dhabi Grand Prix", "circuit": "Yas Marina Circuit"}
}

print(f"Calendar loaded: {len(F1_2025_CALENDAR)} races")

## 5. Agent 1: Data Collection

In [None]:
class DataCollectionAgent:
    """Validates input and collects F1 race data."""
    
    def __init__(self, calendar: Dict[int, Dict[str, str]], year: int = 2025):
        self.calendar = calendar
        self.year = year
    
    def validate_input(self, user_input: str) -> Optional[int]:
        """Validate and convert user input to round number."""
        user_input = user_input.strip()
        
        # Try parsing as round number
        try:
            round_num = int(user_input)
            return round_num if round_num in self.calendar else None
        except ValueError:
            pass
        
        # Try matching GP name
        user_lower = user_input.lower()
        for round_num, info in self.calendar.items():
            if user_lower in info['name'].lower():
                return round_num
        
        return None
    
    def collect_race_data(self, round_num: int, year: Optional[int] = None) -> Optional[Dict[str, Any]]:
        """Collect comprehensive race data."""
        if year is None:
            year = self.year
            
        try:
            print(f"Collecting data for Round {round_num} ({year})...")
            
            # Get event and session
            event = fastf1.get_event(year, round_num)
            session = fastf1.get_session(year, round_num, "R")
            session.load()
            
            results = session.results
            
            # Process results
            drivers_results = []
            for idx, row in results.iterrows():
                # Try multiple position fields (fallback chain) with safe conversion
                position = None
                try:
                    if pd.notna(row.get('Position')) and str(row.get('Position', '')).strip():
                        position = int(row['Position'])
                except (ValueError, TypeError):
                    pass
                
                if position is None:
                    try:
                        if pd.notna(row.get('ClassifiedPosition')) and str(row.get('ClassifiedPosition', '')).strip():
                            position = int(row['ClassifiedPosition'])
                    except (ValueError, TypeError):
                        pass
                
                if position is None and 'Status' in row and str(row['Status']) == 'Finished':
                    # For finished drivers without position, use order in dataframe (usually sorted)
                    position = len([d for d in drivers_results if d['position'] is not None]) + 1
                
                # Handle GridPosition safely (might be empty string or NaN)
                grid_pos = None
                try:
                    if pd.notna(row['GridPosition']) and str(row['GridPosition']).strip():
                        grid_pos = int(row['GridPosition'])
                except (ValueError, TypeError):
                    pass
                
                drivers_results.append({
                    "position": position,
                    "full_name": str(row['FullName']) if pd.notna(row['FullName']) else None,
                    "team": str(row['TeamName']) if pd.notna(row['TeamName']) else None,
                    "grid_position": grid_pos,
                    "time": str(row['Time']) if pd.notna(row['Time']) else None,
                    "points": float(row['Points']) if pd.notna(row['Points']) else 0.0,
                })
            
            # If no positions were found, assign based on results order (FastF1 usually sorts by finish)
            if all(r['position'] is None for r in drivers_results):
                print("No position data from Ergast, using results order")
                for idx, driver in enumerate(drivers_results):
                    driver['position'] = idx + 1
            
            # Get podium and key stats
            podium = sorted([r for r in drivers_results if r['position'] in [1, 2, 3]], key=lambda x: x['position'])
            
            # Compile data
            race_data = {
                "race_id": f"{year}_R{round_num}",
                "year": year,
                "round": round_num,
                "gp_info": {
                    "name": event.EventName,
                    "country": event.Country,
                    "circuit": self.calendar[round_num]['circuit'],
                },
                "podium": podium,
                "final_results": [r for r in drivers_results if r['position'] is not None]
            }
            
            print(f"Data collected: {event.EventName} ({year})")
            print(f"Podium finishers: {len(podium)}")
            return race_data
            
        except Exception as e:
            # Fallback to previous year if current year fails
            if year >= 2024 and year == self.year:
                print(f"{year} data unavailable, trying {year-1}...")
                return self.collect_race_data(round_num, year=year-1)
            print(f"Error: {e}")
            return None
    
    def run(self, user_input: str) -> Optional[Dict[str, Any]]:
        """Main execution."""
        round_num = self.validate_input(user_input)
        if not round_num:
            print(f"Invalid input: '{user_input}'")
            return None
        return self.collect_race_data(round_num)

agent1 = DataCollectionAgent(F1_2025_CALENDAR)
print("Agent 1 initialized")

## 6. Agent 2: Report Generation

In [None]:
class ReportGenerationAgent:
    """Generates social media reports from race data."""
    
    def __init__(self, model_name: str = 'gemini-2.5-flash'):
        self.model = GenerativeModel(model_name)
    
    def generate_report(self, race_data: Dict[str, Any]) -> Optional[str]:
        """Generate social media post."""
        try:
            gp_info = race_data['gp_info']
            podium = race_data['podium']
            
            # Validate podium data
            if len(podium) < 3:
                print(f"Incomplete podium data: only {len(podium)} finisher(s)")
                return None
            
            print(f"Generating report for {gp_info['name']}...")
            
            prompt = f"""Create an Instagram post for this F1 race:

RACE: {gp_info['name']} ({race_data['year']})
CIRCUIT: {gp_info['circuit']}

PODIUM:
1st: {podium[0]['full_name']} ({podium[0]['team']}) - Started P{podium[0]['grid_position']}
2nd: {podium[1]['full_name']} ({podium[1]['team']}) - Started P{podium[1]['grid_position']}
3rd: {podium[2]['full_name']} ({podium[2]['team']}) - Started P{podium[2]['grid_position']}

Write an engaging social media post that tells the race story and highlights the key moments. Don't generate images, just text."""

            response = self.model.generate_content(
                prompt,
                generation_config={
                    "max_output_tokens": 2048,
                    "temperature": 0.5,
                }
            )
            
            print(f"Report generated ({len(response.text)} chars)")
            return response.text.strip()
            
        except Exception as e:
            print(f"Error: {e}")
            return None
    
    def run(self, race_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """Main execution."""
        if not race_data:
            return None
        
        social_media_post = self.generate_report(race_data)
        if not social_media_post:
            return None
        
        return {
            "race_id": race_data['race_id'],
            "race_data": race_data,
            "social_media_post": social_media_post,
            "timestamp": datetime.now().isoformat()
        }

agent2 = ReportGenerationAgent(MODEL_NAME)
print("Agent 2 initialized")

## 7. Workflow: Generate & Store Reports

In [None]:
def generate_f1_report(race_input: str) -> Optional[Dict[str, Any]]:
    """Complete workflow: collect data ‚Üí generate report ‚Üí store in memory."""
    
    # 1. Collect race data
    race_data = agent1.run(race_input)
    if not race_data:
        return None
    
    # 2. Generate report
    full_report = agent2.run(race_data)
    if not full_report:
        return None
    
    # 3. Store in memory (INGEST)
    memory.add_session_to_memory(full_report['race_id'], full_report)
    
    # Display result
    print("\n" + "="*70)
    print("SOCIAL MEDIA POST")
    print("="*70)
    print(f"\n{full_report['social_media_post']}\n")
    print("="*70)
    print(f"Stored as: {full_report['race_id']}")
    
    return full_report

print("Workflow ready")

## 8. Memory Operations

In [None]:
# Search memory (RETRIEVE)
def search_reports(query: str):
    """Search stored reports."""
    results = memory.search_memory(query)
    if results:
        print(f"Found {len(results)} report(s):")
        for r in results:
            print(f"  - {r['race_id']}: {r['gp_name']} ({r['timestamp']})")
    else:
        print(f"No reports found for '{query}'")
    return results

# List all reports
def list_reports():
    """List all stored reports."""
    reports = memory.list_all()
    if reports:
        print(f"Stored reports ({len(reports)}):")
        for r in reports:
            print(f"  - {r['race_id']}: {r['gp_name']}")
    else:
        print("No reports stored yet")
    return reports

# Get specific report
def get_report(race_id: str):
    """Retrieve a specific report."""
    report = memory.get_report(race_id)
    if report:
        print(f"Retrieved: {race_id}")
        print(f"GP: {report['data']['race_data']['gp_info']['name']}")
        print(f"Stored: {report['timestamp']}")
        return report
    else:
        print(f"Report '{race_id}' not found")
        return None

print("Memory operations ready")

## 9. Search & Retrieve from Memory


In [None]:
# List all stored reports
list_reports()

# Search for specific reports
search_reports("Bahrain")

# Retrieve a specific report
report = get_report("2025_R1")


In [None]:
search_reports("Australian Grand Prix")

In [None]:
# Interactive mode
race_input = input("Enter race (round number or GP name): ")
report = generate_f1_report(race_input)


## 10. Demonstration: Full System Workflow


In [None]:
# Demonstration: Generate reports for multiple races
print("="*70)
print("F1 REPORT SYSTEM - DEMONSTRATION")
print("="*70)
print("\nThis demonstrates the complete two-agent system:\n")
print("1. Agent 1: Data Collection (FastF1)")
print("2. Agent 2: Report Generation (Gemini 2.5 Flash)")
print("3. Memory: Persistent Storage (Vertex AI Memory Bank)\n")
print("="*70)

# Test with multiple races
demo_races = ["Monaco", "Saudi Arabia", "Japan"]

for i, race in enumerate(demo_races, 1):
    print(f"\n[{i}/{len(demo_races)}] Processing: {race}")
    print("-" * 70)
    
    report = generate_f1_report(race)
    if report:
        print(f"‚úÖ Success! Report ID: {report['race_id']}")
        print(f"üìä GP: {report['race_data']['gp_info']['name']}")
        print(f"üìù Post length: {len(report['social_media_post'])} characters")
    else:
        print(f"‚ùå Failed to generate report for {race}")

print("\n" + "="*70)
print("SYSTEM SUMMARY")
print("="*70)
all_reports = list_reports()
print(f"\n‚úÖ Total reports stored: {len(all_reports)}")
print(f"‚úÖ Memory system: Vertex AI Memory Bank + Local Backup")
print(f"‚úÖ Agents: Data Collection + Report Generation")
print("="*70)


In [None]:
list_reports()
