In this notebook, we find 
1. The number of games by a player with at least N+ points by season
2. The pair of players who have scored at least N points in a single game 
3. The number of games where the top two players in the team have scored at least N points

In [1]:
import pandas as pd
from euroleague_api.boxscore_data import BoxScoreData

# Load data

## Historical

In [3]:
file = "C:/Users/giase/Documents/euroleague_api/notebooks/data/player_boxscore_stats_2007_2023.csv"
hist_df = pd.read_csv(file)

## Current Season

In [None]:
season = 2024
boxscore_data = BoxScoreData("E")
player_bxs_df = boxscore_data.get_player_boxscore_stats_single_season(season)
player_bxs_df.to_csv(f"player_boxscore_stats_{season}.csv", index=False)

## Merge

In [6]:
df = pd.concat([hist_df, player_bxs_df], axis=0)

In [None]:
df.columns

# Data Processing

In [None]:
mask = (df["Player_ID"] != "Team") & (df["Player_ID"] != "Total")
player_df = df[mask].reset_index(drop=True)

# Top players and pairs

In [13]:
def get_pair_of_players_in_same_game(df, n_points):
    filtered_df = df[df["Points"] >= n_points].copy()
    # Group by Season and Gamecode, then filter for games with more than one player
    # scoring more than `n_points` points
    grouped = filtered_df.groupby(['Season', 'Gamecode']).filter(
        lambda x: (x['Points'] >= n_points).sum() > 1
    )
    # Create a new dataframe to store pairs of players and their count
    player_pairs = []

    # Iterate through each season and gamecode group
    for (season, _, team), group in grouped.groupby(['Season', 'Gamecode', 'Team']):
        # Get all players who scored more than `n_points` points in the game
        players = sorted(group[group['Points'] >= n_points]['Player'].tolist())
        # Create pairs of players
        for i in range(len(players)):
            for j in range(i + 1, len(players)):
                player_pairs.append((season, team, players[i], players[j]))

    # Convert the list to a dataframe
    cols = ['Season',"Team", 'Player1', 'Player2']
    player_pairs_df = pd.DataFrame(player_pairs, columns=cols)

    # Count the number of times each pair appears
    pair_counts = player_pairs_df.groupby(cols).size().reset_index(name='Count')
    return pair_counts

In [18]:
def get_n_games_with_n_points_by_player(df, n_points):
    filtered_df = df[df["Points"] >= n_points].copy()

    # Group by Season and Player, then count the number of games each player scored
    # more than `n_points` points
    player_counts = filtered_df.groupby(
        ['Season', "Team", 'Player']).size().reset_index(name='Count')

    # Sort the result by Season and Count in descending order
    player_counts = player_counts.sort_values(['Season', 'Count'], ascending=[False, False])

    return player_counts


def get_n_games_n_points_top_two_players_by_team(df, n_points):
    player_counts = get_n_games_with_n_points_by_player(df, n_points)
    top_2_players = player_counts.groupby(['Season', 'Team']).apply(
        lambda x: x.nlargest(2, 'Count')).reset_index(drop=True)
    top_2_counts = top_2_players.groupby(['Season', 'Team'])['Count'].sum().reset_index()
    return top_2_counts


In [None]:
n_points = 20

In [None]:
top_pairs_df = get_pair_of_players_in_same_game(player_df, 20)
top_pairs_df.sort_values("Count", ascending=False).head(20).reset_index(drop=True)

In [None]:
top_scorers_df = get_n_games_with_n_points_by_player(player_df, 20)
top_scorers_df.sort_values("Count", ascending=False).head(30).reset_index(drop=True)
top_scorers_df[top_scorers_df["Season"] == 2024].head(20).reset_index(drop=True)

In [None]:
teams_top_pairs_df = get_n_games_n_points_top_two_players_by_team(player_df, 20)
teams_top_pairs_df.sort_values("Count", ascending=False).head(20).reset_index(drop=True)