# GVSA Club Analysis

This notebook provides deep dive analysis for a specific club.

## Usage
1. Set the `target_club_name` variable below
2. Run all cells to see comprehensive analysis including:
   - Club overview and team count
   - Performance across seasons
   - Team statistics aggregation
   - Visualizations of club performance trends
   - Division participation analysis

## Example
The default example analyzes "West Coast United", but you can change the variable to analyze any club.


 database?

In [None]:
# Import required libraries
import sys
from pathlib import Path
from typing import Dict, List, Any

# Database and ORM
from pony.orm import db_session, select, count, sum as db_sum
from models import db, Season, Division, Team, TeamSeason, Match, Club
from db_pony import GVSA_Database

# Data analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set up plotting
%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("Libraries imported successfully")


## Connect to Database


In [None]:
# Connect to the database
db_path = "/projects/gvsa_scrape/gvsa_data2.db"

# Explicitly bind the database (required for Jupyter notebooks)
try:
    if hasattr(db, 'provider') and db.provider:
        db.disconnect()
except Exception:
    pass

# Bind to the database
db.bind(provider='sqlite', filename=db_path, create_db=False)
db.generate_mapping(create_tables=False)

# Initialize database connection
gvsa_db = GVSA_Database(db_path)

print(f"Connected to database: {db_path}")
print(f"Database file exists: {Path(db_path).exists()}")

# Verify connection
with db_session:
    season_count = count(s for s in Season)
    print(f"Seasons in database: {season_count}")


## Configure Target Club

**Modify this variable to analyze a different club:**


In [None]:
# Configuration: Set your target club here
target_club_name = "West Coast United"  # Change this to analyze a different club

print(f"Target club: '{target_club_name}'")


## Find Target Club


In [None]:
# Search for the target club
with db_session:
    # Try exact match first
    clubs = list(select(c for c in Club if c.name == target_club_name))
    
    # If no exact match, try case-insensitive contains
    if not clubs:
        clubs = list(select(c for c in Club if target_club_name.lower() in c.name.lower()))
    
    if clubs:
        target_club = clubs[0]
        print(f"Found club: {target_club.name}")
        
        # Get all teams for this club
        club_teams = list(select(t for t in Team if t.club == target_club))
        print(f"Total teams: {len(club_teams)}")
        
        # Get team seasons
        team_seasons_list = []
        for team in club_teams:
            for ts in team.seasons:
                team_seasons_list.append(ts)
        
        print(f"Total team seasons: {len(team_seasons_list)}")
        
        # Get unique seasons
        seasons_set = set()
        for ts in team_seasons_list:
            seasons_set.add(ts.division.season)
        
        print(f"Seasons active: {len(seasons_set)}")
    else:
        print(f"Warning: Could not find club '{target_club_name}'")
        print("\nAvailable clubs (first 20):")
        all_clubs = list(select(c for c in Club).order_by(Club.name).limit(20))
        for club in all_clubs:
            print(f"  - {club.name}")
        target_club = None


of 

In [None]:
# Calculate club statistics grouped by season
if target_club:
    with db_session:
        stats_by_season = {}
        
        for team in target_club.teams:
            for team_season in team.seasons:
                season = team_season.division.season
                season_key = f"{season.season_name} ({season.year_season})"
                
                if season_key not in stats_by_season:
                    stats_by_season[season_key] = {
                        'season': season,
                        'teams': 0,
                        'total_wins': 0,
                        'total_losses': 0,
                        'total_ties': 0,
                        'total_points': 0,
                        'total_goals_for': 0,
                        'total_goals_against': 0,
                        'divisions': set()
                    }
                
                stats_by_season[season_key]['teams'] += 1
                stats_by_season[season_key]['total_wins'] += team_season.wins
                stats_by_season[season_key]['total_losses'] += team_season.losses
                stats_by_season[season_key]['total_ties'] += team_season.ties
                stats_by_season[season_key]['total_points'] += team_season.points
                stats_by_season[season_key]['total_goals_for'] += team_season.goals_for
                stats_by_season[season_key]['total_goals_against'] += team_season.goals_against
                stats_by_season[season_key]['divisions'].add(team_season.division.division_name)
        
        # Convert sets to lists and calculate win percentage
        for season_key in stats_by_season:
            stats_by_season[season_key]['divisions'] = list(stats_by_season[season_key]['divisions'])
            total_games = (
                stats_by_season[season_key]['total_wins'] + 
                stats_by_season[season_key]['total_losses'] + 
                stats_by_season[season_key]['total_ties']
            )
            stats_by_season[season_key]['win_percentage'] = (
                stats_by_season[season_key]['total_wins'] / max(1, total_games) * 100
            )
        
        # Display summary
        print("=" * 80)
        print(f"Club Statistics Summary: {target_club.name}")
        print("=" * 80)
        print(f"Total teams: {len(target_club.teams)}")
        print(f"Seasons active: {len(stats_by_season)}")
        print()
        
        # Print season-by-season stats
        for season_key in sorted(stats_by_season.keys()):
            stats = stats_by_season[season_key]
            print(f"{season_key}:")
            print(f"  Teams: {stats['teams']}")
            print(f"  Record: {stats['total_wins']}W-{stats['total_losses']}L-{stats['total_ties']}T")
            print(f"  Points: {stats['total_points']}")
            print(f"  Win %: {stats['win_percentage']:.1f}%")
            print(f"  Goals: {stats['total_goals_for']} for, {stats['total_goals_against']} against")
            print(f"  Divisions: {len(stats['divisions'])}")
            print()
else:
    stats_by_season = {}
    print("No club data available")


" ()

In [None]:
# Load all team seasons for the club into a DataFrame
if target_club:
    with db_session:
        club_data = []
        for team in target_club.teams:
            for ts in team.seasons:
                club_data.append({
                    'team_name': ts.team_name,
                    'division': ts.division.division_name,
                    'season': ts.division.season.season_name,
                    'season_year': ts.division.season.year_season,
                    'wins': ts.wins,
                    'losses': ts.losses,
                    'ties': ts.ties,
                    'points': ts.points,
                    'goals_for': ts.goals_for,
                    'goals_against': ts.goals_against,
                    'goal_differential': ts.goal_differential,
                })
        
        df_club = pd.DataFrame(club_data)
        
        print(f"Loaded {len(df_club)} team season records for {target_club.name}")
        print(f"\nDataFrame shape: {df_club.shape}")
        print(f"\nColumns: {list(df_club.columns)}")
        print(f"\nUnique seasons: {df_club['season'].nunique()}")
        print(f"\nUnique divisions: {df_club['division'].nunique()}")
        print(f"\nUnique teams: {df_club['team_name'].nunique()}")
        
        df_club.head(10)
else:
    df_club = pd.DataFrame()
    print("No club data available")


 of 

In [None]:
# Aggregate statistics by season
if len(df_club) > 0:
    season_stats = df_club.groupby('season').agg({
        'points': 'sum',
        'wins': 'sum',
        'losses': 'sum',
        'ties': 'sum',
        'goals_for': 'sum',
        'goals_against': 'sum',
        'team_name': 'count'
    }).rename(columns={'team_name': 'team_count'})
    
    season_stats['win_percentage'] = (
        season_stats['wins'] / 
        (season_stats['wins'] + season_stats['losses'] + season_stats['ties']) * 100
    )
    season_stats['goal_differential'] = season_stats['goals_for'] - season_stats['goals_against']
    
    print("Club Performance by Season:")
    print("=" * 80)
    print(season_stats.sort_index().to_string())
    
    # Plot trends
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Points over time
    axes[0, 0].plot(season_stats.index, season_stats['points'], marker='o')
    axes[0, 0].set_title('Total Points by Season')
    axes[0, 0].set_xlabel('Season')
    axes[0, 0].set_ylabel('Points')
    axes[0, 0].tick_params(axis='x', rotation=45)
    axes[0, 0].grid(True, alpha=0.3)
    
    # Win percentage over time
    axes[0, 1].plot(season_stats.index, season_stats['win_percentage'], marker='o', color='green')
    axes[0, 1].set_title('Win Percentage by Season')
    axes[0, 1].set_xlabel('Season')
    axes[0, 1].set_ylabel('Win %')
    axes[0, 1].tick_params(axis='x', rotation=45)
    axes[0, 1].grid(True, alpha=0.3)
    
    # Goal differential over time
    axes[1, 0].plot(season_stats.index, season_stats['goal_differential'], marker='o', color='orange')
    axes[1, 0].set_title('Goal Differential by Season')
    axes[1, 0].set_xlabel('Season')
    axes[1, 0].set_ylabel('Goal Differential')
    axes[1, 0].tick_params(axis='x', rotation=45)
    axes[1, 0].grid(True, alpha=0.3)
    axes[1, 0].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    
    # Number of teams over time
    axes[1, 1].plot(season_stats.index, season_stats['team_count'], marker='o', color='purple')
    axes[1, 1].set_title('Number of Teams by Season')
    axes[1, 1].set_xlabel('Season')
    axes[1, 1].set_ylabel('Team Count')
    axes[1, 1].tick_params(axis='x', rotation=45)
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("No club data available for visualization")


## Division Participation


In [None]:
# Analyze division participation
if len(df_club) > 0:
    division_counts = df_club['division'].value_counts()
    
    print("Top 20 Divisions by Participation:")
    print("=" * 80)
    print(division_counts.head(20).to_string())
    
    # Visualize division participation
    fig, ax = plt.subplots(figsize=(12, 8))
    division_counts.head(20).plot(kind='barh', ax=ax)
    ax.set_title(f'Division Participation - {target_club.name}')
    ax.set_xlabel('Number of Team Seasons')
    ax.set_ylabel('Division')
    plt.tight_layout()
    plt.show()
else:
    print("No club data available")


seaso

In [None]:
# Summary statistics for all club teams
if len(df_club) > 0:
    print("Club Team Performance Summary:")
    print("=" * 80)
    print(df_club[['wins', 'losses', 'ties', 'points', 'goals_for', 'goals_against', 'goal_differential']].describe())
    
    # Best performing teams
    print("\n" + "=" * 80)
    print("Top 10 Teams by Points:")
    print("=" * 80)
    top_teams = df_club.nlargest(10, 'points')[['team_name', 'division', 'season', 'points', 'wins', 'losses', 'ties', 'goals_for', 'goals_against']]
    print(top_teams.to_string(index=False))
else:
    print("No club data available")
