# 🤖 LLM Analysis of VAR Incidents

**DS 112 Final Project**: VAR Fairness Audit - LLM Extension

This notebook uses Google Gemini to analyze VAR incident descriptions and compare AI predictions with actual referee decisions.

In [None]:
# Install required packages for Colab
!pip install google-generativeai transformers torch seaborn matplotlib pandas scikit-learn plotly

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import google.generativeai as genai
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import warnings
warnings.filterwarnings('ignore')

# Set style for plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## 📊 Data Loading

In [None]:
# Load the combined VAR data
# Upload var_combined.csv to Colab first
df = pd.read_csv('var_combined.csv')

print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
df.head()

## 🧠 Google Gemini Setup

In [None]:
# Configure Google Gemini
# Add your API key to Colab secrets: Runtime > Manage Sessions > Secrets
# Name: GEMINI_API_KEY, Value: your_api_key_here

try:
    from google.colab import userdata
    api_key = userdata.get('GEMINI_API_KEY')
    genai.configure(api_key=api_key)
    print("✅ Gemini API configured successfully")
except Exception as e:
    print("⚠️ Please add GEMINI_API_KEY to Colab secrets")
    print(f"Error: {e}")
    
    # Alternative: Manual configuration (less secure)
    # genai.configure(api_key="YOUR_API_KEY_HERE")

In [None]:
def analyze_var_incident(description, actual_decision):
    """Use Gemini to analyze VAR incident"""
    prompt = f"""
    You are an expert football referee analyst. Analyze this VAR incident:
    
    Incident: {description}
    Actual Decision: {actual_decision}
    
    Please provide:
    1. Fairness Assessment: FAIR or CONTROVERSIAL (one word)
    2. Confidence: 1-10 scale
    3. Brief reasoning
    
    Format:
    FAIRNESS: [FAIR/CONTROVERSIAL]
    CONFIDENCE: [1-10]
    REASONING: [explanation]
    """
    
    try:
        model = genai.GenerativeModel('gemini-pro')
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"Error: {str(e)}"

# Test with first incident
if len(df) > 0:
    sample_analysis = analyze_var_incident(
        df.iloc[0]['Description'], 
        df.iloc[0]['Decision']
    )
    print("🔍 Sample LLM Analysis:")
    print(sample_analysis)

## 🤖 Batch Analysis

In [None]:
# Analyze subset of incidents
sample_size = min(50, len(df))
df_sample = df.head(sample_size).copy()

llm_predictions = []
llm_confidence = []

print(f"Analyzing {sample_size} incidents...")

for idx, row in df_sample.iterrows():
    if idx % 10 == 0:
        print(f"Progress: {idx}/{sample_size}")
    
    analysis = analyze_var_incident(row['Description'], row['Decision'])
    
    # Parse response
    fairness = 'FAIR'
    confidence = 5
    
    for line in analysis.split('\n'):
        if 'FAIRNESS:' in line.upper():
            fairness = 'CONTROVERSIAL' if 'CONTROVERSIAL' in line.upper() else 'FAIR'
        elif 'CONFIDENCE:' in line.upper():
            try:
                confidence = int(''.join(filter(str.isdigit, line)))
                confidence = max(1, min(10, confidence))
            except:
                confidence = 5
    
    llm_predictions.append(fairness)
    llm_confidence.append(confidence)
    
    # Small delay
    import time
    time.sleep(0.5)

df_sample['LLM_Prediction'] = llm_predictions
df_sample['LLM_Confidence'] = llm_confidence

print("✅ Analysis complete!")

## 📊 Results Analysis

In [None]:
# Create binary variables
df_sample['Actual_Overturned'] = (df_sample['Decision'] == 'Overturned').astype(int)
df_sample['LLM_Controversial'] = (df_sample['LLM_Prediction'] == 'CONTROVERSIAL').astype(int)

# Calculate accuracy
accuracy = accuracy_score(df_sample['Actual_Overturned'], df_sample['LLM_Controversial'])
print(f"🎯 LLM Accuracy: {accuracy:.3f}")

# Classification report
print("\n📋 Classification Report:")
print(classification_report(df_sample['Actual_Overturned'], df_sample['LLM_Controversial']))

# Show sample predictions
print("\n🔍 Sample Results:")
for i in range(min(5, len(df_sample))):
    row = df_sample.iloc[i]
    print(f"Team: {row['Team']}")
    print(f"Actual: {row['Decision']} | LLM: {row['LLM_Prediction']} (Conf: {row['LLM_Confidence']})")
    print(f"Description: {row['Description'][:80]}...")
    print("-" * 50)

In [None]:
# Confusion matrix
cm = confusion_matrix(df_sample['Actual_Overturned'], df_sample['LLM_Controversial'])

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['LLM: Fair', 'LLM: Controversial'],
            yticklabels=['Actual: Upheld', 'Actual: Overturned'])
plt.title('🤖 LLM vs Referee Decisions')
plt.ylabel('Actual Decision')
plt.xlabel('LLM Prediction')
plt.show()

# Confidence distribution
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.hist(df_sample['LLM_Confidence'], bins=10, alpha=0.7)
plt.title('LLM Confidence Distribution')
plt.xlabel('Confidence Score')

plt.subplot(1, 2, 2)
pred_counts = df_sample['LLM_Prediction'].value_counts()
plt.pie(pred_counts.values, labels=pred_counts.index, autopct='%1.1f%%')
plt.title('LLM Predictions')

plt.tight_layout()
plt.show()

## 📋 Summary

In [None]:
print("=" * 60)
print("🏁 VAR FAIRNESS AUDIT - LLM ANALYSIS SUMMARY")
print("=" * 60)

print(f"\n📊 Dataset Statistics:")
print(f"• Incidents analyzed: {len(df_sample)}")
print(f"• Actual overturns: {df_sample['Actual_Overturned'].sum()} ({df_sample['Actual_Overturned'].mean():.1%})")
print(f"• LLM controversial: {df_sample['LLM_Controversial'].sum()} ({df_sample['LLM_Controversial'].mean():.1%})")

print(f"\n🤖 LLM Performance:")
print(f"• Accuracy: {accuracy:.1%}")
print(f"• Avg confidence: {df_sample['LLM_Confidence'].mean():.1f}/10")

if accuracy > 0.7:
    print("• ✅ Good agreement with referees")
elif accuracy > 0.5:
    print("• ⚠️ Moderate agreement with referees")
else:
    print("• ❌ Poor agreement with referees")

print("\n✨ Analysis demonstrates AI potential in sports fairness auditing!")
print("=" * 60)