In [None]:
# !pip install altair==5.2.0
from google.colab import drive
import os
import altair as alt
import pandas as pd
from vega_datasets import data
import warnings
import numpy as np

In [None]:
drive.mount('/content/drive')

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/SportsAnalytics/Data Visualization/wnba-shots-2021.csv')
basketball_pitch_image_url = '/content/drive/MyDrive/Colab Notebooks/SportsAnalytics/Data Visualization/court.png'


Mounted at /content/drive


In [None]:
alt.data_transformers.disable_max_rows()
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
df.describe()

Unnamed: 0,game_id,game_play_number,shot_value,coordinate_x,coordinate_y,home_score,away_score,qtr,quarter_seconds_remaining,game_seconds_remaining
count,41497.0,41497.0,41497.0,41497.0,41497.0,41497.0,41497.0,41497.0,41497.0,41497.0
mean,401398200.0,191.2966,0.956672,-44676040.0,-44676060.0,42.53681,41.75109,2.519459,281.99077,1175.086657
std,19171.63,114.060739,1.050284,87168480.0,87168490.0,25.396172,25.171965,1.138671,173.925427,701.455287
min,401391600.0,2.0,0.0,-214748300.0,-214748400.0,0.0,0.0,1.0,0.0,0.0
25%,401391700.0,92.0,0.0,5.0,0.0,21.0,20.0,2.0,128.0,572.0
50%,401391800.0,191.0,1.0,23.0,3.0,42.0,41.0,3.0,279.0,1180.0
75%,401391800.0,287.0,2.0,27.0,14.0,63.0,61.0,4.0,433.0,1790.0
max,401456000.0,533.0,3.0,50.0,81.0,134.0,116.0,6.0,600.0,2395.0


# Data preparation

In [None]:
#@title Get 2s and 3s shoots based on coords
basket_x = 50 / 2
basket_y = 0

def classify_shot_precisely(row):
    """Classifies a shot based on its distance to the basket."""
    # Shot coordinates
    x = row['coordinate_x']
    y = row['coordinate_y']

    # Calculate Euclidean distance from the basket
    distance = np.sqrt((x - basket_x)**2 + (y - basket_y)**2)

    # Classify based on distance
    if distance > 23.75 or (distance > 22 and abs(x) > 20):  # Arc or corner 3s
        return '3-pointer'
    else:
        return '2-pointer'

df['shot_classification'] = df.apply(classify_shot_precisely, axis=1)

In [None]:
#@title Create column shot category (jump shot/layup/dunk/tip shot/floater/others)
def categorize_shot(shot_type):
    if 'Jump Shot' in shot_type:
        return 'Jump Shots'
    elif 'Layup' in shot_type:
        return 'Layups'
    elif 'Dunk' in shot_type:
        return 'Dunks'
    elif 'Hook' in shot_type:
        return 'Hook Shots'
    elif 'Tip' in shot_type:
        return 'Tip Shots'
    elif 'Floating' in shot_type:
        return 'Floaters'
    else:
        return 'Miscellaneous'

df['shot_category'] = df['shot_type'].apply(categorize_shot)

In [None]:
#@title Create columns winning_status (Winning/Losing) and winning_team
df['winning_status'] = df.apply(
    lambda row: 'Winning' if row['home_score'] > row['away_score']
    else 'Losing',
    axis=1)

df['winning_team'] = df.apply(
    lambda row: row['home_team_name'] if row['home_score'] > row['away_score']
    else (row['away_team_name'] if row['away_score'] > row['home_score'] else 'Tie'),
    axis=1)

df['point_difference'] = df['home_score'] - df['away_score']

In [None]:
#@title Create columns for player, action, and full_action
def get_player_name(df):
    full_names = []
    actions = []
    full_actions = []

    for desc in df['desc'].str.split(" "):
        name = desc[0]
        surname = desc[1]
        surname2 = ""

        if desc[2] not in ["misses", "makes", "blocks"]:
            surname2 = desc[2]
            action = desc[3]
            full_action = " ".join(desc[4:])
        else:
            action = desc[2]
            full_action = " ".join(desc[3:])

        full_name = f"{name} {surname} {surname2}".strip()
        full_names.append(full_name)
        actions.append(action)
        full_actions.append(full_action)

    df['player'] = full_names
    df['action'] = actions
    df['full_action'] = full_actions
    return df

df = get_player_name(df)

In [None]:
#@title Get free throws
def add_free_throw_column(data):
    data['free_throw'] = (data['coordinate_x'] < 0) & (data['coordinate_y'] < 0)
    return data
df = add_free_throw_column(df)

In [None]:
data = df[df['coordinate_x'] >= 0].copy()
data = df[df['coordinate_y'] >= 0].copy()

# Data Viz

In [None]:
data.sample(3)

Unnamed: 0,game_id,game_play_number,desc,shot_type,made_shot,shot_value,coordinate_x,coordinate_y,shooting_team,home_team_name,...,game_seconds_remaining,shot_classification,shot_category,winning_status,winning_team,point_difference,player,action,full_action,free_throw
14001,401391731,93,Tiffany Mitchell misses driving layup,Driving Layup Shot,False,0,27,1,Indiana,Minnesota,...,1784,2-pointer,Layups,Winning,Minnesota,1,Tiffany Mitchell,misses,driving layup,False
5794,401391683,253,Megan Gustafson makes 24-foot three point jump...,Jump Shot,True,3,11,20,Phoenix,Las Vegas,...,768,3-pointer,Jump Shots,Winning,Las Vegas,16,Megan Gustafson,makes,24-foot three point jumper (Diamond DeShields ...,False
5343,401391681,8,Rhyne Howard makes 23-foot three point jumper ...,Jump Shot,True,3,2,3,Atlanta,Atlanta,...,2334,2-pointer,Jump Shots,Losing,Washington,-1,Rhyne Howard,makes,23-foot three point jumper (Kristy Wallace ass...,False


In [None]:
data.describe()

Unnamed: 0,game_id,game_play_number,shot_value,coordinate_x,coordinate_y,home_score,away_score,qtr,quarter_seconds_remaining,game_seconds_remaining,point_difference
count,32107.0,32107.0,32107.0,32107.0,32107.0,32107.0,32107.0,32107.0,32107.0,32107.0,32107.0
mean,401398400.0,184.896191,0.996418,24.87769,9.647429,41.237799,40.434173,2.475535,292.021428,1210.755661,0.803625
std,19427.23,113.007423,1.155949,10.453247,8.718907,25.228246,24.937467,1.135326,172.985075,697.521206,10.703648
min,401391600.0,2.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,-39.0
25%,401391700.0,87.0,0.0,20.0,2.0,20.0,19.0,1.0,142.0,602.0,-5.0
50%,401391800.0,183.0,0.0,25.0,6.0,40.0,39.0,2.0,294.0,1222.0,1.0
75%,401391800.0,281.0,2.0,29.0,17.0,61.0,60.0,3.0,442.0,1820.0,7.0
max,401456000.0,533.0,3.0,50.0,81.0,131.0,116.0,6.0,595.0,2395.0,42.0


In [None]:
#@title Scatter of the team shots
indiana_df = data[data['shooting_team'] == 'Washington']

alt.Chart(data).mark_point().encode(
    alt.X('coordinate_x:O'),
    alt.Y('coordinate_y:O'),
    alt.Color('shot_clasification:N')
).properties(
    width=500,
    height=500
)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
#@title Histogram of shot categories
alt.Chart(data).mark_bar().encode(
    alt.X('shot_category:N', title='Shot Categories'),
    alt.Y('count():Q', title='Number of Shots'),
    alt.Color('shot_category:N'),
    tooltip=['shot_category', 'count()']
).properties(
    title='Distribution of Shot Categories',
    width=600,
    height=400
)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
#@title Heatmap of shot position
alt.Chart(data).mark_rect().encode(
    alt.X('coordinate_x:Q', bin=alt.Bin(maxbins=50)),
    alt.Y('coordinate_y:Q', bin=alt.Bin(maxbins=65)),
    color=alt.Color('count():Q', scale=alt.Scale(scheme='greens'), title='Shot Frequency'),
    tooltip=['count()']
).properties(
    width=500,
    height=650,
    title='Shot Frequency Heatmap'
).configure_view(
    strokeWidth=0
)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
#@title Type of shots
shots_in = data[data['shot_value'] != 0]
shots_out = data[data['shot_value'] == 0]

# Visualization
alt.Chart(shots_in).mark_point(opacity=0.7).encode(
    alt.X('coordinate_x:Q', title='X Coordinate'),
    alt.Y('coordinate_y:Q', title='Y Coordinate'),
    alt.Color('shot_category:N', title='Shot Value'),
    tooltip=['shot_category', 'player']
).properties(
    width=500,
    height=650,
    title='Shot Frequency Heatmap'
).configure_view(
    strokeWidth=1
)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
#@title Losing vs Winning
alt.Chart(data).mark_rect(opacity=1).encode(
    alt.X('coordinate_x:Q', bin=alt.Bin(maxbins=50)),
    alt.Y('coordinate_y:Q', bin=alt.Bin(maxbins=65)),
    alt.Color('winning_status:N', title='Shot Classification'),
    tooltip=['shot_type', 'shot_classification', 'home_score', 'away_score']
).properties(
    width=500,
    height=650,
    title='Shot Location by result'
)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
#@title heatmap by point diff

alt.Chart(data).mark_rect().encode(
    alt.X('coordinate_x:Q', bin=alt.Bin(maxbins=50)),
    alt.Y('coordinate_y:Q', bin=alt.Bin(maxbins=65)),
    color=alt.Color('point_difference:Q', scale=alt.Scale(scheme='redblue'), title='Point Difference'),
    tooltip=['point_difference', 'count()']
).properties(
    width=500,
    height=650,
    title='Point Difference Heatmap'
).configure_view(
    strokeWidth=1
)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
alt.Chart(data).mark_bar().encode(
    x='shot_value:N',
    y='count()',
    color='winning_status:N',
    column='winning_status:N'
).properties(
    title='Shot Value Distribution by Winning/Losing Status'
)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
#@title Heatmap of blocks
blocks = data[data['action'] == 'blocks']
alt.Chart(blocks).mark_rect().encode(
    alt.X('coordinate_x:Q', bin=alt.Bin(maxbins=25)),
    alt.Y('coordinate_y:Q', bin=alt.Bin(maxbins=32)),
    color=alt.Color('count():Q', scale=alt.Scale(scheme='purples'), title='Blocks Frequency'),
    tooltip=['count()']
).properties(
    width=500,
    height=650,
    title='Blocks Frequency Heatmap'
).configure_view(
    strokeWidth=1
)

In [2]:
game_df = data[data['game_id'] == 401391658]

alt.Chart(game_df).mark_area(opacity=0.5).encode(
    alt.X('game_seconds_remaining:Q', title='Game Time Remaining (Seconds)'),
    alt.Y('point_difference:Q', title='Cumulative Point Difference'),
    alt.Color('winning_status:N'),
    tooltip=['shooting_team:N', 'sum(point_difference):Q']
).properties(
    title='Point Difference Over Game Time',
    width=600,
    height=400
)

NameError: name 'data' is not defined

In [None]:

# Filter the data for the selected team using a dropdown selection
team_selection = alt.binding_select(options=list(data['shooting_team'].unique()), name='Select Team: ')
team_selection_variable = alt.selection_single(fields=['shooting_team'], bind=team_selection, init={'shooting_team': 'Washington'})

# Convert game time into minutes
data['game_minutes_remaining'] = data['game_seconds_remaining'] // 60

# Aggregate data: Average point difference by minute for all teams
team_avg_df = data.groupby(['shooting_team', 'game_minutes_remaining'], as_index=False).agg(
    avg_point_difference=('point_difference', 'mean')
)

# Create the Altair line chart with team selection
chart = alt.Chart(team_avg_df).mark_area(color='blue').encode(
    alt.X('game_minutes_remaining:Q', title='Game Time Remaining (Minutes)'),
    alt.Y('avg_point_difference:Q', title='Average Point Difference'),
    alt.Color('point_difference:N', legend=None),
    tooltip=['game_minutes_remaining:N', 'avg_point_difference:Q']
).add_selection(
    team_selection_variable
).transform_filter(
    team_selection_variable
).properties(
    title='Average Point Difference Over Game Time by Team',
    width=600,
    height=400
)

chart


Deprecated since `altair=5.0.0`. Use selection_point instead.
  team_selection_variable = alt.selection_single(fields=['shooting_team'], bind=team_selection, init={'shooting_team': 'Washington'})


TypeError: altair.vegalite.v5.schema.core.SelectionParameter() got multiple values for keyword argument 'value'

In [None]:
alt.Chart(data).mark_bar().encode(
    x=alt.X('mean(point_difference):Q', title='Average Point Difference'),
    y=alt.Y('shot_category:N', sort='-x', title='Shot Category'),
    color=alt.Color('mean(point_difference):Q', scale=alt.Scale(scheme='magma', domainMid=0)),
    tooltip=['shot_type:N', 'mean(point_difference):Q', 'count():Q']
).properties(
    title='Average Point Difference by Shot Category',
    width=600,
    height=400
)

In [None]:
# prompt: How is the distribution of shot attempts of a team?

import altair as alt

alt.Chart(data).mark_bar().encode(
    alt.X("shot_classification:N", title="Shot Classification"),
    alt.Y("count()", title="Number of Shots"),
    alt.Color("shot_classification:N")
).properties(
    title="Distribution of Shot Attempts by Classification"
)

In [None]:
# prompt: How do two different teams compare in terms of successful or failed shots?

import pandas as pd
import altair as alt

# Assuming 'data' DataFrame is already loaded and processed as in your original code

# Group data by team and shot outcome (successful/missed)
team_comparison = data.groupby(['shooting_team', 'shot_value'])['shot_value'].count().reset_index(name='shot_count')

# Create the bar chart
alt.Chart(team_comparison).mark_bar().encode(
    x='shooting_team:N',
    y='shot_count:Q',
    color='shot_value:N',
    column='shot_value:N'
).properties(
    title='Shot Outcome Comparison by Team'
)

In [None]:
# prompt: How the shots statistics (successful vs failed) compare per quarter?

# Assuming 'data' DataFrame is already loaded and processed as in your original code

# Group data by quarter and shot outcome
quarter_comparison = data.groupby(['qtr', 'shot_value'])['shot_value'].count().reset_index(name='shot_count')

# Create the bar chart
alt.Chart(quarter_comparison).mark_bar().encode(
    x='qtr:N',
    y='shot_count:Q',
    color='shot_value:N',
    column='shot_value:N'
).properties(
    title='Shot Outcome Comparison by Quarter'
)

In [None]:
# prompt: • How do the scores of two teams compare for a certain game? game_id=401391650, team1='Indiana', team2='Washington'

# Assuming 'data' DataFrame is already loaded and processed as in your original code

def compare_teams_in_game(game_id, team1, team2):
    """Compares the scores of two teams for a specific game."""

    game_data = data[data['game_id'] == game_id]
    team1_data = game_data[game_data['shooting_team'] == team1]
    team2_data = game_data[game_data['shooting_team'] == team2]

    if team1_data.empty or team2_data.empty:
        print(f"No data found for game {game_id} with teams {team1} and {team2}")
        return

    team1_score = team1_data['shot_value'].sum()
    team2_score = team2_data['shot_value'].sum()

    print(f"Game ID: {game_id}")
    print(f"{team1} score: {team1_score}")
    print(f"{team2} score: {team2_score}")

    if team1_score > team2_score:
        print(f"{team1} won the game.")
    elif team2_score > team1_score:
        print(f"{team2} won the game.")
    else:
        print(f"The game ended in a tie.")

# Example usage:
compare_teams_in_game(401391650, 'Indiana', 'Washington')

In [None]:
#@title Team and Game Selection with Dynamic Filtering
import ipywidgets as widgets
from IPython.display import display, clear_output

# Assuming 'data' DataFrame is already loaded and processed as in your original code

# Get unique team names
teams = data['shooting_team'].unique().tolist()

# Create widgets for team and game selection
team_dropdown = widgets.Dropdown(
    options=teams,
    value=teams[0],  # Default to the first team
    description='Team:'
)

game_dropdown = widgets.Dropdown(
    options=[],  # Initially empty; will be updated dynamically
    description='Game ID:'
)

time_slider = widgets.IntRangeSlider(
    value=[0, 600],  # Default range in seconds
    min=0,
    max=data['game_seconds_remaining'].max(),
    step=10,
    description='Time (sec):',
    continuous_update=False
)

# Create an output widget to display the plots
output_widget = widgets.Output()

# Function to update the game dropdown based on the selected team
def update_game_dropdown(change):
    selected_team = change['new']
    team_games = data[data['shooting_team'] == selected_team]['game_id'].unique().tolist()
    game_dropdown.options = team_games
    if team_games:
        game_dropdown.value = team_games[0]  # Set the default value to the first game

# Function to update the plots based on all filters
def update_plot(change=None):
    with output_widget:
        clear_output(wait=True)  # Clear the previous plot

        # Get filter values
        selected_team = team_dropdown.value
        selected_game = game_dropdown.value
        selected_time = time_slider.value

        # Filter the data
        filtered_data = data[
            (data['shooting_team'] == selected_team) &
            (data['game_id'] == selected_game) &
            (data['game_seconds_remaining'] >= selected_time[0]) &
            (data['game_seconds_remaining'] <= selected_time[1])
        ]

        # Create heatmaps for each quarter
        heatmaps = []
        for qtr in range(1, 5):  # Loop through quarters 1 to 4
            quarter_data = filtered_data[filtered_data['qtr'] == qtr]

            heatmap = alt.Chart(quarter_data).mark_rect().encode(
                alt.X('coordinate_x:Q', bin=alt.Bin(maxbins=10)),
                alt.Y('coordinate_y:Q', bin=alt.Bin(maxbins=13)),
                color=alt.Color('count():Q', scale=alt.Scale(scheme='greens'), title='Shot Frequency'),
                tooltip=['count()']
            ).properties(
                width=250,  # Adjust width for side-by-side layout
                height=300,
                title=f'Quarter {qtr}'
            )

            heatmaps.append(heatmap)

        # Combine heatmaps into a single visualization (4 columns)
        combined_chart = alt.hconcat(*heatmaps).configure_view(
            strokeWidth=1
        ).resolve_scale(
            color='independent'  # Ensure color scales are independent for each quarter
        )

        display(combined_chart)

# Observe changes in team dropdown to update game options
team_dropdown.observe(update_game_dropdown, names='value')

# Observe changes in dropdowns and sliders to update the plot
game_dropdown.observe(update_plot, names='value')
time_slider.observe(update_plot, names='value')

# Display widgets and the output widget
controls = widgets.VBox([team_dropdown, game_dropdown, time_slider])
display(controls)
display(output_widget)

# Initialize the dynamic filtering with the first team's games
update_game_dropdown({'new': teams[0]})
update_plot()


In [None]:
indiana_data = data[data['shooting_team'] == 'Indiana']

# Create a summary of key metrics for each player
player_stats = indiana_data.groupby('player').agg({
    'made_shot': 'sum',
    'shot_value': 'sum',
    'desc': 'count'
}).rename(columns={'desc': 'shot_attempts'})

player_stats['shot_efficiency'] = player_stats['made_shot'] / player_stats['shot_attempts']
player_stats['points_per_shot'] = player_stats['shot_value'] / player_stats['shot_attempts']

player_stats = player_stats.sort_values(by='shot_value', ascending=False).reset_index()

top_players = player_stats.head(5)
alt.Chart(top_players).mark_bar().encode(
    x=alt.X('shot_value:Q', title='Total Points Scored'),
    y=alt.Y('player:N', sort='-x', title='Player'),
    tooltip=['player', 'shot_value', 'shot_efficiency', 'shot_attempts', 'points_per_shot']
).properties(
    title='Top 5 Influential Players (Indiana)',
    width=600,
    height=400
)

In [None]:
#@title Shots of sabrina ionescu

import altair as alt

# Assuming 'data' DataFrame is already loaded and processed as in your original code

# Filter data for Sabrina Ionescu
sabrina_data = data[data['player'] == 'Sabrina Ionescu']

# Create the heatmap
alt.Chart(sabrina_data).mark_rect().encode(
    alt.X('coordinate_x:Q', bin=alt.Bin(maxbins=50)),
    alt.Y('coordinate_y:Q', bin=alt.Bin(maxbins=65)),
    color=alt.Color('count():Q', scale=alt.Scale(scheme='reds'), title='Shot Frequency'),
    tooltip=['count()']
).properties(
    width=500,
    height=650,
    title='Sabrina Ionescu Shot Frequency Heatmap'
).configure_view(
    strokeWidth=1
)

In [None]:
# Extract the updated data structure with new columns
# Recreate the key analyses for the dashboard

# Calculate top players by points scored
player_stats = data.groupby('player').agg({
    'made_shot': 'sum',
    'shot_value': 'sum',
    'desc': 'count'
}).rename(columns={'desc': 'shot_attempts'})

player_stats['shot_efficiency'] = player_stats['made_shot'] / player_stats['shot_attempts']

# Sort and get the top 5 players
top_players = player_stats.sort_values(by='shot_value', ascending=False).head(5).reset_index()

# Determine most frequent action (mode) for each player and add it to player_stats
action_preference = data.groupby('player')['shot_category'].apply(
    lambda x: x.mode()[0] if not x.empty and len(x.mode()) > 0 else "Unknown"
).reset_index(name='shot_type_preference')

# Merge action preferences back into player_stats
player_stats = player_stats.merge(action_preference, on='player')

# Prepare heatmap data (court location and shot frequency)
heatmap_data = data.groupby(['coordinate_x', 'coordinate_y']).size().reset_index(name='shot_frequency')

# Bar Chart: Top Players
bar_chart = alt.Chart(top_players).mark_bar().encode(
    x=alt.X('shot_value:Q', title='Total Points Scored'),
    y=alt.Y('player:N', sort='-x', title='Player'),
    tooltip=['player', 'shot_value', 'shot_efficiency', 'shot_attempts']
).properties(
    title="Top 5 Players by Total Points",
    width=400,
    height=300
)

# Scatter Plot: Shot Attempts vs Efficiency
scatter_plot = alt.Chart(player_stats).mark_circle(size=100).encode(
    x=alt.X('shot_attempts:Q', title='Total Shot Attempts'),
    y=alt.Y('shot_efficiency:Q', title='Shooting Efficiency'),
    color=alt.Color('shot_type_preference:N', title='Action Preference', legend=alt.Legend()),
    tooltip=['player', 'shot_attempts', 'shot_efficiency', 'shot_type_preference']
).properties(
    title="Player Shooting Efficiency vs Attempts",
    width=400,
    height=300
)


# Heatmap: Shot Locations
heatmap = alt.Chart(heatmap_data).mark_rect().encode(
    x=alt.X('coordinate_x:Q', bin=alt.Bin(maxbins=20), title='Court X Coordinate'),
    y=alt.Y('coordinate_y:Q', bin=alt.Bin(maxbins=20), title='Court Y Coordinate'),
    color=alt.Color('shot_frequency:Q', scale=alt.Scale(scheme='blues'), title='Shot Frequency'),
    tooltip=['shot_frequency']
).properties(
    title="Shot Frequency Heatmap",
    width=400,
    height=300
)

# Combine visuals into a dashboard layout
dashboard = (bar_chart | scatter_plot) & heatmap
dashboard


# QUarters

In [None]:
# Group data by quarter and shot outcome (made_shot)
quarterly_stats = data.groupby(['qtr', 'made_shot']).size().reset_index(name='shot_count')

# Create a stacked bar chart
shot_stats_chart = alt.Chart(quarterly_stats).mark_bar().encode(
    x=alt.X('qtr:O', title='Quarter'),
    y=alt.Y('shot_count:Q', title='Number of Shots'),
    color=alt.Color('made_shot:N', title='Shot Outcome', scale=alt.Scale(scheme='set1')),
    tooltip=['qtr', 'made_shot', 'shot_count']
).properties(
    title='Shot Statistics: Successful vs Failed per Quarter',
    width=600,
    height=400
)

shot_stats_chart


In [None]:
# Let's create a visualization for "Shot Efficiency per Quarter" as it provides great insights.

# Calculate shot efficiency per quarter
efficiency_per_quarter = data.groupby('qtr').agg({
    'made_shot': 'sum',
    'desc': 'count'
}).reset_index()
efficiency_per_quarter['shot_efficiency'] = efficiency_per_quarter['made_shot'] / efficiency_per_quarter['desc']

# Bar Chart: Shot Efficiency per Quarter
efficiency_chart = alt.Chart(efficiency_per_quarter).mark_bar().encode(
    x=alt.X('qtr:O', title='Quarter'),
    y=alt.Y('shot_efficiency:Q', title='Shooting Efficiency'),
    tooltip=['qtr', 'shot_efficiency']
).properties(
    title='Shooting Efficiency per Quarter',
    width=600,
    height=400
)

efficiency_chart


In [None]:
# Analyze clutch performance: Shots in the final 30 seconds of each quarter

# Define a clutch threshold (final 30 seconds of each quarter)
clutch_threshold = 30

# Filter data for clutch shots
clutch_data = data[data['quarter_seconds_remaining'] <= clutch_threshold]

# Group clutch data by quarter and shot success
clutch_stats = clutch_data.groupby(['qtr', 'made_shot']).size().reset_index(name='shot_count')

# Create a stacked bar chart for clutch shots
clutch_chart = alt.Chart(clutch_stats).mark_bar().encode(
    x=alt.X('qtr:O', title='Quarter'),
    y=alt.Y('shot_count:Q', title='Clutch Shot Attempts (Final 30 Seconds)'),
    color=alt.Color('made_shot:N', title='Shot Outcome', scale=alt.Scale(scheme='set1')),
    tooltip=['qtr', 'made_shot', 'shot_count']
).properties(
    title='Clutch Shot Attempts (Final 30 Seconds per Quarter)',
    width=600,
    height=400
)

clutch_chart


In [None]:
# Add a column to classify shots as 'Clutch' or 'Normal'
data['time_category'] = data['quarter_seconds_remaining'].apply(
    lambda x: 'Clutch' if x <= 30 else 'Normal'
)

# Group data by quarter, time category, and shot success
time_stats = data.groupby(['qtr', 'time_category', 'made_shot']).size().reset_index(name='shot_count')

# Create a stacked bar chart
time_comparison_chart = alt.Chart(time_stats).mark_bar().encode(
    x=alt.X('qtr:O', title='Quarter'),
    y=alt.Y('shot_count:Q', title='Shot Attempts'),
    color=alt.Color('made_shot:N', title='Shot Outcome', scale=alt.Scale(scheme='set1')),
    column=alt.Column('time_category:N', title='Time Category'),
    tooltip=['qtr', 'time_category', 'made_shot', 'shot_count']
).properties(
    title='Comparison of Clutch and Normal Time Shot Attempts per Quarter',
    width=300,
    height=400
)

time_comparison_chart


In [None]:
#@title Shot % during the quarters
data_df = data[~data['qtr'].isin([5, 6])]
data_df = data_df[data_df['home_team_name'] == 'Washington']

def calculate_shot_percentage(df, bin_width=25):
    # Create binned time column
    df['binned_time'] = (df['quarter_seconds_remaining'] // bin_width) * bin_width
    binned_data = df.groupby(['qtr', 'binned_time', 'made_shot'])['made_shot'].count().reset_index(name='shot_count')
    return binned_data

# Calculate binned shot counts
binned_data = calculate_shot_percentage(data_df)

# Add a percentage column
def calculate_percentage(group):
    total_shots = group['shot_count'].sum()
    group['percentage'] = (group['shot_count'] / total_shots) * 100
    return group

# Group by quarter and binned time, then calculate percentages
binned_data = binned_data.groupby(['qtr', 'binned_time']).apply(calculate_percentage).reset_index(drop=True)

# Filter for made shots only
percentage_data = binned_data[binned_data['made_shot'] == 1]

# Create the line plot
line_plot = alt.Chart(percentage_data).mark_area().encode(
    x=alt.X('binned_time:Q', title='Quarter Seconds Remaining (Binned)'),
    y=alt.Y('percentage:Q', title='Shot Success Percentage (%)'),
    color=alt.Color('qtr:N', title='Quarter'),
    tooltip=['qtr', 'binned_time', 'percentage']
).properties(
    title='Shot Success Percentage Over Time (Binned) by Quarter',
    width=800,
    height=400
)

line_plot


# Widgets

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# Assuming 'data' DataFrame is already loaded and processed as in your original code

# Get unique team names and game IDs
teams = data['shooting_team'].unique().tolist()
games = data['game_id'].unique().tolist()

# Create widgets for filters
team_dropdown = widgets.Dropdown(
    options=teams,
    value=teams[0],  # Default to the first team
    description='Team:'
)

game_dropdown = widgets.Dropdown(
    options=games,
    value=games[0],  # Default to the first game
    description='Game ID:'
)

time_slider = widgets.IntRangeSlider(
    value=[0, 600],  # Default range in seconds
    min=0,
    max=data['game_seconds_remaining'].max(),
    step=10,
    description='Time (sec):',
    continuous_update=False
)

# Create an output widget to display the plots
output_widget = widgets.Output()

# Define the function to update plots based on all filters
def update_plot(change=None):
    with output_widget:
        clear_output(wait=True)  # Clear the previous plot

# Display widgets and the output widget
controls = widgets.VBox([team_dropdown, game_dropdown, time_slider])
display(controls)
display(output_widget)
update_plot()

# LAB

## Ex 1 Distribution of shots of a team

In [None]:
#@title Distribution of shots of a team
import ipywidgets as widgets
from IPython.display import display, clear_output
import altair as alt

# Get unique team names
teams = data['shooting_team'].unique().tolist()

# Create a dropdown widget for team selection
team_dropdown = widgets.Dropdown(
    options=teams,
    value=teams[0],  # Default to the first team
    description='Team:'
)

# Create an output widget to display the heatmap
output_widget = widgets.Output()

# Function to update the heatmap based on the selected team
def update_heatmap(change=None):
    with output_widget:
        clear_output(wait=True)  # Clear the previous plot

        # Filter data for the selected team
        selected_team = team_dropdown.value
        team_data = data[data['shooting_team'] == selected_team]

        heatmap = alt.Chart(team_data).mark_rect().encode(
            x=alt.X('coordinate_x:Q', bin=alt.Bin(maxbins=50), title='Court X Coordinate'),
            y=alt.Y('coordinate_y:Q', bin=alt.Bin(maxbins=65), title='Court Y Coordinate'),
            color=alt.Color('count()', scale=alt.Scale(scheme='purples')),
            tooltip=['count()']
        ).properties(
            title=f'Distribution of shots for {selected_team}',
            width=500,
            height=650)

        display(heatmap)

team_dropdown.observe(update_heatmap, names='value')

display(team_dropdown)
display(output_widget)

update_heatmap()

In [None]:
#@title Distribution of shots of a team (Scatter Plot)
import ipywidgets as widgets
from IPython.display import display, clear_output
import altair as alt

# Get unique team names
teams = data['shooting_team'].unique().tolist()

# Create a dropdown widget for team selection
team_dropdown = widgets.Dropdown(
    options=teams,
    value=teams[0],  # Default to the first team
    description='Team:'
)

# Create an output widget to display the scatter plot
output_widget = widgets.Output()

# Function to update the scatter plot based on the selected team
def update_scatter(change=None):
    with output_widget:
        clear_output(wait=True)  # Clear the previous plot

        # Filter data for the selected team
        selected_team = team_dropdown.value
        team_data = data[data['shooting_team'] == selected_team]

        # Create scatter plot
        scatter = alt.Chart(team_data).mark_circle(size=25, opacity=0.9).encode(
            x=alt.X('coordinate_x:Q', title='Court X Coordinate'),
            y=alt.Y('coordinate_y:Q', title='Court Y Coordinate'),
            tooltip=['coordinate_x', 'coordinate_y']
        ).properties(
            title=f'Distribution of shots for {selected_team}',
            width=500,
            height=650
        )

        display(scatter)

# Observe changes in the dropdown value to update the scatter plot
team_dropdown.observe(update_scatter, names='value')

# Display the dropdown and the output widget
display(team_dropdown)
display(output_widget)

# Initialize the scatter plot with the default team
update_scatter()


In [None]:
# Definir límites del campo de baloncesto
x_min, x_max = 0, 50
y_min, y_max = 0, 81

# Heatmap con filtro por cuarto y límites fijos
quarter_selection = alt.binding_select(
    options=data['qtr'].dropna().unique().tolist(),
    name='Select Quarter: '
)
quarter_selection_variable = alt.selection_single(
    fields=['qtr'], bind=quarter_selection, init={'qtr': data['qtr'].min()}
)

heatmap = alt.Chart(data).mark_rect().encode(
    alt.X('coordinate_x:Q', bin=alt.Bin(maxbins=50), scale=alt.Scale(domain=[x_min, x_max])),
    alt.Y('coordinate_y:Q', bin=alt.Bin(maxbins=65), scale=alt.Scale(domain=[y_min, y_max])),
    alt.Color('count():Q', scale=alt.Scale(scheme='greens')),
    tooltip=['count()']
).add_selection(
    quarter_selection_variable
).transform_filter(
    quarter_selection_variable
).properties(
    width=500,
    height=650,
    title='Point Difference Heatmap by Quarter'
).configure_view(
    strokeWidth=1
)

heatmap
