# GVSA Division Analysis

This notebook provides deep dive analysis for a specific division.

## Usage
1. Set the `target_division_name` and `target_season` variables below
2. Run all cells to see comprehensive analysis including:
   - Division standings and rankings
   - Team performance comparisons
   - Distribution analysis of metrics
   - Visualizations of team performance
   - Summary statistics

## Example
The default example analyzes "U11 Boys 5th Division" in Fall 2025, but you can change these variables to analyze any division.


son

In [None]:
# Import required libraries
import sys
from pathlib import Path

# Database and ORM
from pony.orm import db_session, select, count
from models import db, Season, Division, Team, TeamSeason, Match, Club
from db_pony import GVSA_Database

# Data analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set up plotting
%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("Libraries imported successfully")


## Connect to Database


In [None]:
# Connect to the database
db_path = "/projects/gvsa_scrape/gvsa_data2.db"

# Explicitly bind the database (required for Jupyter notebooks)
try:
    if hasattr(db, 'provider') and db.provider:
        db.disconnect()
except Exception:
    pass

# Bind to the database
db.bind(provider='sqlite', filename=db_path, create_db=False)
db.generate_mapping(create_tables=False)

# Initialize database connection
gvsa_db = GVSA_Database(db_path)

print(f"Connected to database: {db_path}")
print(f"Database file exists: {Path(db_path).exists()}")

# Verify connection
with db_session:
    season_count = count(s for s in Season)
    print(f"Seasons in database: {season_count}")


 Make i

In [None]:
# Configuration: Set your target division and season here
target_division_name = "U11 Boys 5th Division"  # Change this to analyze a different division
target_season = "Fall 2025"  # Change this to analyze a different season

print(f"Target division: '{target_division_name}'")
print(f"Target season: {target_season}")


## Find Target Division



In [None]:
# Search for the target division
with db_session:
    # Try to find division matching name and season
    divisions = list(select(
        d for d in Division 
        if target_division_name.lower() in d.division_name.lower()
        and target_season in d.season.season_name
    ))
    
    if divisions:
        target_division = divisions[0]
        print(f"Found division: {target_division.division_name}")
        print(f"Season: {target_division.season.season_name}")
        
        # Get all team seasons for this division
        team_seasons = list(select(ts for ts in TeamSeason if ts.division == target_division))
        print(f"Teams in division: {len(team_seasons)}")
    else:
        print(f"Warning: Could not find division '{target_division_name}' in season '{target_season}'")
        print("\nSearching for similar divisions...")
        # Try broader search
        similar_divisions = list(select(
            d for d in Division 
            if any(term in d.division_name.lower() for term in target_division_name.lower().split()[:3])
            and target_season in d.season.season_name
        ))
        if similar_divisions:
            print(f"Found {len(similar_divisions)} similar divisions:")
            for div in similar_divisions:
                print(f"  - {div.division_name} ({div.season.season_name})")
            target_division = similar_divisions[0]
            print(f"\nUsing: {target_division.division_name}")
        else:
            print("No similar divisions found. Please verify the division name and season.")
            target_division = None


## Load Division Data


In [None]:
# Load division data into DataFrame
if target_division:
    with db_session:
        division_data = []
        for ts in select(ts for ts in TeamSeason if ts.division == target_division):
            division_data.append({
                'team_name': ts.team_name,
                'wins': ts.wins,
                'losses': ts.losses,
                'ties': ts.ties,
                'points': ts.points,
                'goals_for': ts.goals_for,
                'goals_against': ts.goals_against,
                'goal_differential': ts.goal_differential,
            })
        
        df_division = pd.DataFrame(division_data)
        
        # Sort by points (descending), then goal differential
        df_division = df_division.sort_values(['points', 'goal_differential'], ascending=[False, False])
        df_division['rank'] = range(1, len(df_division) + 1)
        
        print(f"Loaded {len(df_division)} teams for {target_division.division_name}")
        print(f"\nDataFrame shape: {df_division.shape}")
        print(f"\nColumns: {list(df_division.columns)}")
        
        df_division.head(10)
else:
    df_division = pd.DataFrame()
    print("No division data available")


## Division Standings


In [None]:
# Display full standings
if len(df_division) > 0:
    print("=" * 80)
    print(f"Division Standings: {target_division.division_name}")
    print(f"Season: {target_division.season.season_name}")
    print("=" * 80)
    
    # Display standings table
    standings_cols = ['rank', 'team_name', 'points', 'wins', 'losses', 'ties', 
                      'goals_for', 'goals_against', 'goal_differential']
    print(df_division[standings_cols].to_string(index=False))
else:
    print("No division data available")


## Distribution Analysis


In [None]:
# Visualize distribution of key metrics
if len(df_division) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Points distribution
    axes[0, 0].hist(df_division['points'], bins=20, edgecolor='black', alpha=0.7)
    axes[0, 0].set_title('Points Distribution')
    axes[0, 0].set_xlabel('Points')
    axes[0, 0].set_ylabel('Number of Teams')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Goal differential distribution
    axes[0, 1].hist(df_division['goal_differential'], bins=20, edgecolor='black', alpha=0.7, color='orange')
    axes[0, 1].set_title('Goal Differential Distribution')
    axes[0, 1].set_xlabel('Goal Differential')
    axes[0, 1].set_ylabel('Number of Teams')
    axes[0, 1].grid(True, alpha=0.3)
    axes[0, 1].axvline(x=0, color='r', linestyle='--', alpha=0.5)
    
    # Goals For distribution
    axes[1, 0].hist(df_division['goals_for'], bins=20, edgecolor='black', alpha=0.7, color='green')
    axes[1, 0].set_title('Goals For Distribution')
    axes[1, 0].set_xlabel('Goals For')
    axes[1, 0].set_ylabel('Number of Teams')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Goals Against distribution
    axes[1, 1].hist(df_division['goals_against'], bins=20, edgecolor='black', alpha=0.7, color='red')
    axes[1, 1].set_title('Goals Against Distribution')
    axes[1, 1].set_xlabel('Goals Against')
    axes[1, 1].set_ylabel('Number of Teams')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.suptitle(f'Division Performance Distributions - {target_division.division_name}', 
                 fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()
else:
    print("No division data available for visualization")


## Standings Visualization


In [None]:
# Visualize standings with bar charts
if len(df_division) > 0:
    fig, axes = plt.subplots(2, 1, figsize=(14, 10))
    
    # Points by team
    axes[0].barh(range(len(df_division)), df_division['points'].values)
    axes[0].set_yticks(range(len(df_division)))
    axes[0].set_yticklabels(df_division['team_name'].values, fontsize=8)
    axes[0].set_xlabel('Points')
    axes[0].set_title('Points by Team')
    axes[0].invert_yaxis()  # Top team at top
    axes[0].grid(True, alpha=0.3, axis='x')
    
    # Goal differential by team
    axes[1].barh(range(len(df_division)), df_division['goal_differential'].values, color='orange')
    axes[1].set_yticks(range(len(df_division)))
    axes[1].set_yticklabels(df_division['team_name'].values, fontsize=8)
    axes[1].set_xlabel('Goal Differential')
    axes[1].set_title('Goal Differential by Team')
    axes[1].axvline(x=0, color='r', linestyle='--', alpha=0.5)
    axes[1].invert_yaxis()  # Top team at top
    axes[1].grid(True, alpha=0.3, axis='x')
    
    plt.suptitle(f'Division Standings Visualization - {target_division.division_name}', 
                 fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()
else:
    print("No division data available for visualization")


## Summary Statistics


In [None]:
# Summary statistics
if len(df_division) > 0:
    print("=" * 80)
    print(f"Division Summary Statistics: {target_division.division_name}")
    print("=" * 80)
    print(df_division[['wins', 'losses', 'ties', 'points', 'goals_for', 'goals_against', 'goal_differential']].describe())
    
    print("\n" + "=" * 80)
    print("Top 5 Teams:")
    print("=" * 80)
    top_teams = df_division.head(5)[['rank', 'team_name', 'points', 'wins', 'losses', 'ties', 'goal_differential']]
    print(top_teams.to_string(index=False))
    
    print("\n" + "=" * 80)
    print("Bottom 5 Teams:")
    print("=" * 80)
    bottom_teams = df_division.tail(5)[['rank', 'team_name', 'points', 'wins', 'losses', 'ties', 'goal_differential']]
    print(bottom_teams.to_string(index=False))
else:
    print("No division data available")
