# üèéÔ∏è GR Cup Analytics - Additional Data Processing

This notebook handles new telemetry data integration with existing baselines.

## What This Notebook Does:
- ‚úÖ Processes new telemetry sessions
- ‚úÖ Compares new data with existing baselines
- ‚úÖ Updates performance benchmarks
- ‚úÖ Generates driver improvement reports
- ‚úÖ Uploads new data to live dashboard

## Use Cases:
- Adding new practice/qualifying/race sessions
- Integrating new driver data
- Updating performance baselines
- Generating progress reports

---

## üìã Step 1: Setup and Configuration

In [None]:
import sys
import pandas as pd
import numpy as np
from pathlib import Path
import json
import boto3
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Add project root to path
sys.path.append(str(Path.cwd()))

from scripts.new_data_processor import NewDataProcessor
from src.data_processing.data_cleaner import GRCupDataCleaner

print("üèéÔ∏è GR Cup Analytics - Additional Data Processing")
print("=" * 55)
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()

# Initialize processor
processor = NewDataProcessor()

# Check existing baselines
baseline_dir = Path("data/baselines")
if baseline_dir.exists():
    existing_baselines = list(baseline_dir.glob("*_baseline_metrics.json"))
    print(f"üìä Found {len(existing_baselines)} existing baselines:")
    for baseline_file in existing_baselines:
        track_id = baseline_file.stem.replace('_baseline_metrics', '')
        print(f"  ‚úÖ {track_id}")
else:
    print("‚ö†Ô∏è No existing baselines found. Run core deployment notebook first.")

print()

## üì§ Step 2: Upload New Telemetry Data

In [None]:
# Configuration for new data file
NEW_DATA_FILE = "path/to/your/new/telemetry.csv"  # Update this path
SESSION_TYPE = "PRACTICE"  # PRACTICE, QUALIFYING, or RACE
SESSION_DATE = datetime.now().strftime("%Y%m%d")

print("üì§ New Telemetry Data Upload Configuration")
print("=" * 45)
print(f"Data File: {NEW_DATA_FILE}")
print(f"Session Type: {SESSION_TYPE}")
print(f"Session Date: {SESSION_DATE}")
print()

# Check if file exists
if Path(NEW_DATA_FILE).exists():
    print("‚úÖ New data file found")
    
    # Preview the data
    preview_df = pd.read_csv(NEW_DATA_FILE, nrows=5)
    print(f"üìä Data preview ({len(preview_df)} rows shown):")
    print(preview_df.head())
    print()
    
    # Check data format
    required_columns = [
        'vehicle_id', 'timestamp', 'lap', 'Speed', 'pbrake_f', 
        'ath', 'Steering_Angle', 'accx_can', 'accy_can', 'nmotor', 'Gear'
    ]
    
    missing_columns = [col for col in required_columns if col not in preview_df.columns]
    if missing_columns:
        print(f"‚ùå Missing required columns: {missing_columns}")
        print("Please ensure your data file has all required columns.")
    else:
        print("‚úÖ Data format validation passed")
        
        # Get basic stats
        full_df = pd.read_csv(NEW_DATA_FILE)
        print(f"üìà Data Statistics:")
        print(f"  Total Records: {len(full_df):,}")
        print(f"  Unique Drivers: {full_df['vehicle_id'].nunique()}")
        print(f"  Total Laps: {full_df['lap'].nunique()}")
        print(f"  Track ID: {full_df['track_id'].iloc[0] if 'track_id' in full_df.columns else 'Not specified'}")
        
else:
    print(f"‚ùå Data file not found: {NEW_DATA_FILE}")
    print("Please update the NEW_DATA_FILE path above to point to your telemetry CSV file.")
    print()
    print("üìã Required CSV format:")
    print("vehicle_id,timestamp,meta_time,lap,Speed,pbrake_f,ath,Steering_Angle,accx_can,accy_can,nmotor,Gear,track_name,track_id")
    print("GR86-002-015,1761847091636,1761847091636,1,158.02,0.0,77.00,-0.16,-0.84,0.45,6900.42,5,Barber Motorsports Park,BMP")