# VAR Fairness Audit: Statistical Analysis

**DS 112 Final Project**

This notebook performs statistical tests and ML modeling to detect potential bias.

In [None]:
# Install required packages
!pip install pandas numpy matplotlib seaborn plotly scikit-learn scipy

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set plotting style
plt.style.use('default')
plt.rcParams['figure.figsize'] = (10, 6)

In [None]:
# Create necessary derived features to match the expected columns
print("Creating derived features...")

# 1. Create decision_favorable column based on Decision and IncidentType
def determine_favorability(incident_type, decision):
    # Define favorable incidents
    favorable_incidents = ['Penalty', 'Goal Review']
    unfavorable_incidents = ['Red Card', 'Offside', 'Handball']
    
    # Define favorable decisions
    favorable_decisions = ['Upheld']
    unfavorable_decisions = ['Overturned']
    
    # Logic for favorability
    if incident_type in favorable_incidents and decision in favorable_decisions:
        return 1  # Favorable
    elif incident_type in unfavorable_incidents and decision in unfavorable_decisions:
        return 1  # Favorable (overturning a red card or offside is favorable)
    elif incident_type in favorable_incidents and decision in unfavorable_decisions:
        return 0  # Unfavorable
    elif incident_type in unfavorable_incidents and decision in favorable_decisions:
        return 0  # Unfavorable (upholding a red card or offside is unfavorable)
    else:
        return 0.5  # Neutral

# Apply the function to create decision_favorable column
df['decision_favorable'] = df.apply(lambda row: determine_favorability(row['IncidentType'], row['Decision']), axis=1)

# 2. Create team_tier column based on Rank
# Define quartiles for team ranking
rank_bins = [0, 5, 10, 15, 20]  # Adjust based on your data
rank_labels = ['Top Tier', 'Upper Mid', 'Lower Mid', 'Bottom Tier']

# Create team_tier column
df['team_tier'] = pd.cut(df['Rank'], bins=rank_bins, labels=rank_labels, include_lowest=True)

# 3. Clean up TimeInMatch column to extract numeric minutes
# Some TimeInMatch values might have format like '45+2'' or '90+3''
def extract_minutes(time_str):
    if pd.isna(time_str):
        return 45  # Default to middle of game if missing
    
    # Remove any non-numeric characters and get the base minute
    time_str = str(time_str).strip("'")
    if '+' in time_str:
        base_minute = int(time_str.split('+')[0])
        return base_minute
    try:
        return int(time_str.strip("'"))
    except:
        return 45  # Default to middle of game if parsing fails

# Apply the function if TimeInMatch is not already numeric
if not pd.api.types.is_numeric_dtype(df['TimeInMatch']):
    df['TimeInMatch'] = df['TimeInMatch'].apply(extract_minutes)

print("✅ Derived features created successfully")

## Load and Prepare Data

First, let's load the combined dataset and prepare it for statistical analysis.

In [None]:
# Output file from data extraction notebook
COMBINED_FILE = 'var_combined.csv'

# Load the combined dataset
try:
    df = pd.read_csv(COMBINED_FILE)
    print(f"✅ Successfully loaded dataset from {COMBINED_FILE}")
    print(f"Dataset shape: {df.shape} (rows, columns)")
    print("\nFirst 5 rows:")
    display(df.head())
    
    # Display column information
    print("\nColumn information:")
    df.info()
    
except FileNotFoundError:
    print(f"❌ Error: Could not find {COMBINED_FILE}")
    print("Attempting to load and merge original datasets...")
    
    try:
        # Try to load original datasets
        var_incidents = pd.read_csv('VAR_Incidents_Stats.csv')
        team_stats = pd.read_csv('VAR_Team_Stats.csv')
        
        # Merge datasets
        df = pd.merge(var_incidents, team_stats, on='Team', how='left')
        print("✅ Successfully loaded and merged original datasets")
        print(f"Dataset shape: {df.shape} (rows, columns)")
        print("\nFirst 5 rows:")
        display(df.head())
        
        # Save combined dataset for future use
        df.to_csv(COMBINED_FILE, index=False)
        print(f"✅ Saved merged dataset to {COMBINED_FILE}")
        
    except FileNotFoundError:
        print("❌ Error: Could not find original data files either.")
        print("Please make sure VAR_Incidents_Stats.csv and VAR_Team_Stats.csv are available.")
        df = None
except Exception as e:
    print(f"❌ Error loading data: {str(e)}")
    df = None

## Check Required Features

Before proceeding with analysis, let's verify that we have all the necessary features.

In [None]:
if df is not None:
    # List of expected features for analysis
    required_features = {
        'Team': 'Team name',
        'IncidentType': 'Type of VAR decision',
        'TimeInMatch': 'Minute in the match when decision occurred'
    }
    
    # Team stats features we expect to have
    team_stats_features = {
        'Rank': 'Team ranking in the league',
        'Goals_For': 'Team market value',
        'Fouls_Per_Game': 'Average attendance at home games',
        'Wins': 'Historical success metric'
    }
    
    # Check for required features
    missing_required = [feat for feat in required_features if feat not in df.columns]
    if missing_required:
        print(f"❌ Missing required features: {', '.join(missing_required)}")
        print("Some analyses may not be possible without these features.")
    else:
        print("✅ All required basic features are present")
    
    # Check for team stats features
    missing_team_stats = [feat for feat in team_stats_features if feat not in df.columns]
    if missing_team_stats:
        print(f"⚠️ Missing team stats features: {', '.join(missing_team_stats)}")
        print("Creating dummy features for analysis...")
        
        # Create dummy features if needed
        if 'Rank' not in df.columns:
            # Create dummy rank based on alphabetical order of team names
            team_ranks = {team: i+1 for i, team in enumerate(sorted(df['Team'].unique()))}
            df['Rank'] = df['Team'].map(team_ranks)
            print("  ✓ Created dummy 'Rank' feature")
        
        if 'Goals_For' not in df.columns:
            # Create dummy market value based on team rank (if available) or random values
            if 'Rank' in df.columns:
                df['Goals_For'] = 100000000 / df['Rank']
            else:
                df['Goals_For'] = np.random.randint(10000000, 100000000, size=len(df))
            print("  ✓ Created dummy 'Goals_For' feature")
        
        if 'Fouls_Per_Game' not in df.columns:
            # Create dummy attendance based on team rank (if available) or random values
            if 'Rank' in df.columns:
                df['Fouls_Per_Game'] = 50000 / df['Rank']
            else:
                df['Fouls_Per_Game'] = np.random.randint(10000, 50000, size=len(df))
            print("  ✓ Created dummy 'Fouls_Per_Game' feature")
        
        if 'Wins' not in df.columns:
            # Create dummy historical success based on team rank (if available) or random values
            if 'Rank' in df.columns:
                df['Wins'] = 100 / df['Rank']
            else:
                df['Wins'] = np.random.randint(1, 100, size=len(df))
            print("  ✓ Created dummy 'Wins' feature")
    else:
        print("✅ All team stats features are present")
    
    # Create decision_favorable if it doesn't exist
    if 'decision_favorable' not in df.columns and 'IncidentType' in df.columns:
        print("Creating 'decision_favorable' feature...")
        # Define favorable decisions
        favorable_decisions = ['penalty_awarded', 'goal_allowed', 'red_card_to_opponent']
        unfavorable_decisions = ['penalty_overturned', 'goal_disallowed', 'red_card_to_team']
        
        # Create a decision outcome feature
        def determine_favorability(decision):
            if decision in favorable_decisions:
                return 1  # Favorable
            elif decision in unfavorable_decisions:
                return 0  # Unfavorable
            else:
                return 0.5  # Neutral
        
        df['decision_favorable'] = df['IncidentType'].apply(determine_favorability)
        print("  ✓ Created 'decision_favorable' feature")
    
    # Create team tier if it doesn't exist
    if 'team_tier' not in df.columns and 'Rank' in df.columns:
        print("Creating 'team_tier' feature...")
        df['team_tier'] = pd.qcut(df['Rank'], q=4, labels=['Top Tier', 'Upper Mid', 'Lower Mid', 'Bottom Tier'])
        print("  ✓ Created 'team_tier' feature")
    
    print("\nDataset ready for analysis with all necessary features!")
else:
    print("❌ Cannot proceed with analysis without data.")

## Statistical Tests

Let's perform some statistical tests to check for potential bias in VAR decisions.

In [None]:
# Import statistical libraries
from scipy import stats
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

if df is not None and 'team_tier' in df.columns and 'decision_favorable' in df.columns:
    # Chi-square test of independence between team tier and favorable decisions
    print("Performing Chi-square test of independence...")
    contingency = pd.crosstab(df['team_tier'], df['decision_favorable'])
    print("\nContingency Table (Team Tier vs. Decision Favorability):")
    print(contingency)
    
    chi2, p, dof, expected = stats.chi2_contingency(contingency)
    print(f"\nChi-square statistic: {chi2:.4f}")
    print(f"p-value: {p:.4f}")
    print(f"Degrees of freedom: {dof}")
    
    # Interpret the result
    alpha = 0.05
    print(f"\nSignificance level: {alpha}")
    if p < alpha:
        print("Conclusion: Reject the null hypothesis.")
        print("There is a statistically significant relationship between team tier and favorable VAR decisions.")
    else:
        print("Conclusion: Fail to reject the null hypothesis.")
        print("There is no statistically significant relationship between team tier and favorable VAR decisions.")
    
    print("\nExpected frequencies (if no relationship):")
    print(pd.DataFrame(expected, index=contingency.index, columns=contingency.columns))
else:
    print("❌ Cannot perform chi-square test without 'team_tier' and 'decision_favorable' features.")

## Logistic Regression Model

Let's build a model to predict favorable VAR decisions based on team characteristics.

In [None]:
if df is not None and 'decision_favorable' in df.columns:
    # Identify available features for modeling
    potential_features = ['Rank', 'Goals_For', 'Fouls_Per_Game', 'Wins']
    available_features = [f for f in potential_features if f in df.columns]
    
    if len(available_features) > 0:
        print(f"Building logistic regression model using features: {', '.join(available_features)}")
        
        # Prepare features and target variable
        X = df[available_features]
        y = df['decision_favorable']
        
        # Handle any remaining NaN values
        X = X.fillna(X.median())
        
        # Split data into train and test sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        
        # Train logistic regression model
        model = LogisticRegression(max_iter=1000)
        model.fit(X_train, y_train)
        
        # Evaluate the model
        y_pred = model.predict(X_test)
        print("\nModel Evaluation:")
        print(classification_report(y_test, y_pred))
        
        # Display confusion matrix
        conf_matrix = confusion_matrix(y_test, y_pred)
        print("\nConfusion Matrix:")
        print(conf_matrix)
        
        # Get feature coefficients
        coefs = pd.DataFrame({
            'Feature': X.columns,
            'Coefficient': model.coef_[0]
        })
        coefs = coefs.sort_values('Coefficient', ascending=False)
        
        print("\nFeature Importance:")
        print(coefs)
        
        # Plot coefficients
        plt.figure(figsize=(10, 6))
        sns.barplot(x='Coefficient', y='Feature', data=coefs)
        plt.title('Feature Importance for Predicting Favorable VAR Decisions')
        plt.axvline(x=0, color='black', linestyle='--')
        plt.tight_layout()
        plt.show()
        
        # Interpret the results
        print("\nInterpretation:")
        for feature, coef in zip(coefs['Feature'], coefs['Coefficient']):
            if coef > 0:
                print(f"- As {feature} increases, the likelihood of favorable VAR decisions increases")
            else:
                print(f"- As {feature} increases, the likelihood of favorable VAR decisions decreases")
        
        # Calculate odds ratios for better interpretation
        coefs['Odds_Ratio'] = np.exp(coefs['Coefficient'])
        print("\nOdds Ratios:")
        print(coefs[['Feature', 'Coefficient', 'Odds_Ratio']])
    else:
        print("❌ No suitable features available for modeling.")
else:
    print("❌ Cannot build logistic regression model without 'decision_favorable' feature.")

## Additional Statistical Tests

Let's perform some additional tests to further investigate potential bias.

In [None]:
if df is not None:
    # 1. Test if top tier teams get more favorable decisions than bottom tier teams
    if 'team_tier' in df.columns and 'decision_favorable' in df.columns:
        print("Comparing favorable decisions between top and bottom tier teams...")
        
        # Filter for top and bottom tier teams
        top_tier = df[df['team_tier'] == 'Top Tier']['decision_favorable']
        bottom_tier = df[df['team_tier'] == 'Bottom Tier']['decision_favorable']
        
        # Calculate mean favorable decision rate for each group
        print(f"Top tier teams favorable decision rate: {top_tier.mean():.2f}")
        print(f"Bottom tier teams favorable decision rate: {bottom_tier.mean():.2f}")
        
        # Perform t-test
        t_stat, p_val = stats.ttest_ind(top_tier, bottom_tier, equal_var=False)
        print(f"\nIndependent t-test results:")
        print(f"t-statistic: {t_stat:.4f}")
        print(f"p-value: {p_val:.4f}")
        
        # Interpret the result
        alpha = 0.05
        if p_val < alpha:
            print("Conclusion: Reject the null hypothesis.")
            print("There is a statistically significant difference in favorable decision rates between top and bottom tier teams.")
        else:
            print("Conclusion: Fail to reject the null hypothesis.")
            print("There is no statistically significant difference in favorable decision rates between top and bottom tier teams.")
    
    # 2. Test if decision patterns change in late game situations
    if 'TimeInMatch' in df.columns and 'decision_favorable' in df.columns:
        print("\nAnalyzing decision patterns in different match periods...")
        
        # Define early and late game
        early_game = df[df['TimeInMatch'] <= 45]['decision_favorable']
        late_game = df[df['TimeInMatch'] > 75]['decision_favorable']
        
        # Calculate mean favorable decision rate for each period
        print(f"Early game favorable decision rate: {early_game.mean():.2f}")
        print(f"Late game favorable decision rate: {late_game.mean():.2f}")
        
        # Perform t-test
        t_stat, p_val = stats.ttest_ind(early_game, late_game, equal_var=False)
        print(f"\nIndependent t-test results:")
        print(f"t-statistic: {t_stat:.4f}")
        print(f"p-value: {p_val:.4f}")
        
        # Interpret the result
        alpha = 0.05
        if p_val < alpha:
            print("Conclusion: Reject the null hypothesis.")
            print("There is a statistically significant difference in favorable decision rates between early and late game situations.")
        else:
            print("Conclusion: Fail to reject the null hypothesis.")
            print("There is no statistically significant difference in favorable decision rates between early and late game situations.")
else:
    print("❌ Cannot perform additional tests without data.")

## Conclusions and Recommendations

Based on our statistical analysis, let's summarize the key findings and provide recommendations.

In [None]:
if df is not None:
    print("VAR Fairness Audit: Key Findings")
    print("===============================\n")
    
    # 1. Overall decision distribution
    if 'IncidentType' in df.columns:
        decision_counts = df['IncidentType'].value_counts()
        print("1. Decision Distribution:")
        print(f"   - Total VAR decisions analyzed: {len(df)}")
        print(f"   - Most common decision: {decision_counts.index[0]} ({decision_counts.iloc[0]} occurrences, {decision_counts.iloc[0]/len(df)*100:.1f}%)")
    
    # 2. Team tier analysis
    if 'team_tier' in df.columns and 'decision_favorable' in df.columns:
        tier_favor = df.groupby('team_tier')['decision_favorable'].mean().sort_values(ascending=False)
        print("\n2. Team Tier Analysis:")
        print("   Favorable Decision Rates by Team Tier:")
        for tier, rate in tier_favor.items():
            print(f"   - {tier}: {rate:.1%}")
        
        # Calculate the difference between top and bottom tiers
        if 'Top Tier' in tier_favor.index and 'Bottom Tier' in tier_favor.index:
            diff = tier_favor['Top Tier'] - tier_favor['Bottom Tier']
            print(f"   - Difference between Top and Bottom tiers: {diff:.1%}")
    
    # 3. Predictive modeling results
    if 'Rank' in df.columns and 'decision_favorable' in df.columns:
        print("\n3. Predictive Modeling:")
        print("   The most influential factors for favorable VAR decisions:")
        
        # This assumes the logistic regression was run above
        try:
            for i, (feature, coef) in enumerate(zip(coefs['Feature'][:2], coefs['Coefficient'][:2])):
                direction = "positive" if coef > 0 else "negative"
                print(f"   - {feature}: {direction} relationship (coefficient: {coef:.4f})")
        except:
            print("   Unable to display model coefficients.")
    
    # 4. Recommendations
    print("\n4. Recommendations:")
    print("   Based on the statistical analysis, we recommend:")
    print("   - Implement blind review processes where VAR officials don't know team identities")
    print("   - Establish clear, objective criteria for different types of VAR decisions")
    print("   - Conduct regular audits of VAR decisions to identify and address potential bias")
    print("   - Increase transparency by publishing detailed explanations of VAR decisions")
    
    # 5. Limitations
    print("\n5. Limitations of this Analysis:")
    print("   - Limited sample size may affect statistical power")
    print("   - Correlation doesn't imply causation; other factors may explain observed patterns")
    print("   - Decision favorability is subjective and may not capture all nuances")
    print("   - Data quality issues may affect the reliability of findings")
else:
    print("❌ Cannot generate conclusions without data.")