<a href="https://colab.research.google.com/github/hossamhalawa/throw_in_case_study_visualization.ipynb/blob/main/throw_in_case_study_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()


Saving Left Side_table_data.csv to Left Side_table_data.csv


In [None]:
from google.colab import files
uploaded = files.upload()


Saving right_side_throw_ins.csv to right_side_throw_ins.csv


In [None]:
"""
Throw-in Dataset Visualization and Comparison

This script contains all the code used to analyze, visualize, and compare
the left-side and right-side throw-in datasets.

Modified for Google Colab compatibility with relative file paths.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import os
from matplotlib.gridspec import GridSpec

# Create output directory if it doesn't exist
output_dir = './output'
os.makedirs(output_dir, exist_ok=True)

# Part 1: Load and Process Left-Side Dataset
# -----------------------------------------

# For Google Colab, you'll need to upload these files or use Google Drive
# Uncomment these lines if using Google Drive
# from google.colab import drive
# drive.mount('/content/drive')
# left_file_path = '/content/drive/MyDrive/LeftSide_table_data.csv'
# right_file_path = '/content/drive/MyDrive/right_side_throw_ins.csv'

# If uploading directly to Colab session
left_file_path = 'Left Side_table_data.csv'  # Update this path as needed
right_file_path = 'right_side_throw_ins.csv'  # Update this path as needed

# Load the left-side dataset
left_df = pd.read_csv(left_file_path)

# Remove the summary row at the bottom
left_df = left_df[left_df['No.'] != 'Total']

# Convert No. to numeric
left_df['No.'] = pd.to_numeric(left_df['No.'])

# Calculate First Contact Success Rate
left_df['First Contact Success'] = left_df['Success/Fail'].str.startswith('Yes')

# Function to parse possession duration to seconds
def parse_duration(duration_str):
    if pd.isna(duration_str) or duration_str == 'Zero':
        return 0

    # Extract numeric part using regex
    match = re.search(r'(\d+)', str(duration_str))
    if match:
        return int(match.group(1))
    return 0

# Apply the function to parse possession duration
left_df['Possession Duration (sec)'] = left_df['Possession Duration'].apply(parse_duration)

# Determine possession retention (≥7 seconds)
left_df['Possession Retained'] = left_df['Possession Duration (sec)'] >= 7

# Save the processed left-side dataframe
left_df.to_csv(os.path.join(output_dir, 'categorized_throw_in_data.csv'), index=False)

# Part 2: Left-Side Dataset Visualizations
# ---------------------------------------

# 1. First Contact Success Rate Visualization - Pie Chart
plt.figure(figsize=(10, 6))
success_counts = left_df['First Contact Success'].value_counts()
labels = ['Success', 'Failure']
sizes = [success_counts.get(True, 0), success_counts.get(False, 0)]
colors = ['#66b3ff', '#ff9999']
explode = (0.1, 0)  # explode the 1st slice (Success)

plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle
plt.title('First Contact Success Rate', fontsize=16)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'first_contact_success_pie.png'), dpi=300)
plt.close()

# 2. Possession Retention Success Rate Visualization
# Filter for successful first contacts
successful_first_contacts = left_df[left_df['First Contact Success'] == True]

# Create pie chart for possession retention
plt.figure(figsize=(10, 6))
retention_counts = successful_first_contacts['Possession Retained'].value_counts()
labels = ['Retained (≥7 sec)', 'Lost (<7 sec)']
sizes = [retention_counts.get(True, 0), retention_counts.get(False, 0)]
colors = ['#66c2a5', '#fc8d62']
explode = (0.1, 0)  # explode the 1st slice (Retained)

plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')
plt.title('Possession Retention Success Rate\n(After Successful First Contact)', fontsize=16)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'possession_retention_success_pie.png'), dpi=300)
plt.close()

# Create a bar chart showing possession duration for successful first contacts
plt.figure(figsize=(12, 6))
successful_first_contacts = successful_first_contacts.sort_values('Possession Duration (sec)')
bars = plt.bar(range(len(successful_first_contacts)), successful_first_contacts['Possession Duration (sec)'],
        color=[('#66c2a5' if x >= 7 else '#fc8d62') for x in successful_first_contacts['Possession Duration (sec)']])

plt.axhline(y=7, color='red', linestyle='--', label='7 Second Threshold')

plt.xlabel('Throw-in Number', fontsize=12)
plt.ylabel('Possession Duration (seconds)', fontsize=12)
plt.title('Possession Duration After Successful First Contact', fontsize=16)
plt.xticks(range(len(successful_first_contacts)), successful_first_contacts['No.'])
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
            f'{height:.0f}s',
            ha='center', va='bottom', rotation=0)

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'possession_duration_bar.png'), dpi=300)
plt.close()

plt.figure(figsize=(10, 6))
shot_counts = left_df['Shot Out'].value_counts()
labels = ['No Shot Created', 'Shot Created']
sizes = [shot_counts.get('No', 0), shot_counts.get('Yes', 0)]
colors = ['#8da0cb', '#fc8d62']

plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')
plt.title('Shot Creation Success Rate', fontsize=16)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'shot_creation_success_pie.png'), dpi=300)
plt.close()


success_counts = pd.crosstab(
    [left_df['Length']],
    [left_df['Direction']],
    values=left_df['First Contact Success'],
    aggfunc='sum'
)

total_counts = pd.crosstab(
    [left_df['Length']],
    [left_df['Direction']]
)

success_rates = (success_counts / total_counts) * 100

success_rates = success_rates.fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(success_rates, annot=True, cmap='YlGnBu', fmt='.1f',
            cbar_kws={'label': 'First Contact Success Rate (%)'})
plt.title('First Contact Success Rate by Throw-in Length and Direction', fontsize=16)
plt.xlabel('Direction', fontsize=14)
plt.ylabel('Length', fontsize=14)

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'first_contact_success_heatmap.png'), dpi=300)
plt.close()

plt.figure(figsize=(10, 6))
length_counts = left_df['Length'].value_counts().sort_index()
bars = plt.bar(length_counts.index, length_counts.values, color=['#8dd3c7', '#bebada', '#fb8072'])

plt.xlabel('Throw-in Length', fontsize=14)
plt.ylabel('Number of Throw-ins', fontsize=14)
plt.title('Distribution of Throw-ins by Length', fontsize=16)
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
            f'{height}',
            ha='center', va='bottom', fontsize=12)

total = length_counts.sum()
for i, bar in enumerate(bars):
    height = bar.get_height()
    percentage = (height / total) * 100
    plt.text(bar.get_x() + bar.get_width()/2., height/2,
            f'{percentage:.1f}%',
            ha='center', va='center', fontsize=12, color='white', fontweight='bold')

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'throw_in_length_distribution.png'), dpi=300)
plt.close()

plt.figure(figsize=(10, 6))
plt.pie(length_counts, labels=length_counts.index, autopct='%1.1f%%',
        shadow=True, startangle=90, colors=['#8dd3c7', '#bebada', '#fb8072'])
plt.axis('equal')
plt.title('Throw-in Length Distribution', fontsize=16)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'throw_in_length_pie.png'), dpi=300)
plt.close()

plt.figure(figsize=(10, 6))
direction_counts = left_df['Direction'].value_counts().sort_index()
bars = plt.bar(direction_counts.index, direction_counts.values, color=['#80b1d3', '#fdb462', '#b3de69'])

plt.xlabel('Throw-in Direction', fontsize=14)
plt.ylabel('Number of Throw-ins', fontsize=14)
plt.title('Distribution of Throw-ins by Direction', fontsize=16)
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
            f'{height}',
            ha='center', va='bottom', fontsize=12)

total = direction_counts.sum()
for i, bar in enumerate(bars):
    height = bar.get_height()
    percentage = (height / total) * 100
    plt.text(bar.get_x() + bar.get_width()/2., height/2,
            f'{percentage:.1f}%',
            ha='center', va='center', fontsize=12, color='white', fontweight='bold')

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'throw_in_direction_distribution.png'), dpi=300)
plt.close()

plt.figure(figsize=(10, 6))
plt.pie(direction_counts, labels=direction_counts.index, autopct='%1.1f%%',
        shadow=True, startangle=90, colors=['#80b1d3', '#fdb462', '#b3de69'])
plt.axis('equal')
plt.title('Throw-in Direction Distribution', fontsize=16)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'throw_in_direction_pie.png'), dpi=300)
plt.close()

plt.figure(figsize=(12, 7))
metrics = ['First Contact\nSuccess Rate', 'Possession Retention\nSuccess Rate', 'Shot Creation\nSuccess Rate']
values = [
    (left_df['First Contact Success'].sum() / len(left_df)) * 100,
    (len(left_df[(left_df['First Contact Success'] == True) & (left_df['Possession Duration (sec)'] >= 7)]) /
     left_df['First Contact Success'].sum()) * 100 if left_df['First Contact Success'].sum() > 0 else 0,
    (len(left_df[left_df['Shot Out'] == 'Yes']) / len(left_df)) * 100
]

bars = plt.bar(metrics, values, color=['#66b3ff', '#66c2a5', '#fc8d62'])
plt.ylabel('Success Rate (%)', fontsize=14)
plt.title('Comparison of All Success Rates', fontsize=16)
plt.ylim(0, 100)  # Set y-axis from 0 to 100%
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 2,
            f'{height:.1f}%',
            ha='center', va='bottom', fontsize=12)

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'all_success_rates_comparison.png'), dpi=300)
plt.close()


right_df = pd.read_csv(right_file_path)

right_df = right_df[right_df['No.'] != 'Total']

right_df['No.'] = pd.to_numeric(right_df['No.'])

right_df['First Contact Success'] = right_df['Success/Fail'].str.startswith('Yes')

right_df['Possession Duration (sec)'] = right_df['Possession Duration'].apply(parse_duration)

right_df['Possession Retained'] = right_df['Possession Duration (sec)'] >= 7

right_df['Shot Created'] = right_df['Shot Creation '].str.startswith('Yes')

right_df.to_csv(os.path.join(output_dir, 'processed_right_side_data.csv'), index=False)


plt.figure(figsize=(10, 6))
success_counts = right_df['First Contact Success'].value_counts()
labels = ['Success', 'Failure']
sizes = [success_counts.get(True, 0), success_counts.get(False, 0)]
colors = ['#66b3ff', '#ff9999']
explode = (0.1, 0)

plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')
plt.title('First Contact Success Rate (Right Side)', fontsize=16)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'right_side_first_contact_success_pie.png'), dpi=300)
plt.close()


successful_first_contacts = right_df[right_df['First Contact Success'] == True]

plt.figure(figsize=(10, 6))
retention_counts = successful_first_contacts['Possession Retained'].value_counts()
labels = ['Retained (≥7 sec)', 'Lost (<7 sec)']
sizes = [retention_counts.get(True, 0), retention_counts.get(False, 0)]
colors = ['#66c2a5', '#fc8d62']
explode = (0.1, 0)  # explode the 1st slice (Retained)

plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')
plt.title('Possession Retention Success Rate (Right Side)\n(After Successful First Contact)', fontsize=16)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'right_side_possession_retention_success_pie.png'), dpi=300)
plt.close()

plt.figure(figsize=(12, 6))
if len(successful_first_contacts) > 0:
    successful_first_contacts = successful_first_contacts.sort_values('Possession Duration (sec)')
    bars = plt.bar(range(len(successful_first_contacts)), successful_first_contacts['Possession Duration (sec)'],
            color=[('#66c2a5' if x >= 7 else '#fc8d62') for x in successful_first_contacts['Possession Duration (sec)']])

    plt.axhline(y=7, color='red', linestyle='--', label='7 Second Threshold')

    plt.xlabel('Throw-in Number', fontsize=12)
    plt.ylabel('Possession Duration (seconds)', fontsize=12)
    plt.title('Possession Duration After Successful First Contact (Right Side)', fontsize=16)
    plt.xticks(range(len(successful_first_contacts)), successful_first_contacts['No.'])
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    for i, bar in enumerate(bars):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
                f'{height:.0f}s',
                ha='center', va='bottom', rotation=0)
else:
    plt.text(0.5, 0.5, 'No successful first contacts', ha='center', va='center', fontsize=14)

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'right_side_possession_duration_bar.png'), dpi=300)
plt.close()

plt.figure(figsize=(10, 6))
shot_counts = right_df['Shot Created'].value_counts()
labels = ['No Shot Created', 'Shot Created']
sizes = [shot_counts.get(False, 0), shot_counts.get(True, 0)]
colors = ['#8da0cb', '#fc8d62']

plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')
plt.title('Shot Creation Success Rate (Right Side)', fontsize=16)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'right_side_shot_creation_success_pie.png'), dpi=300)
plt.close()


success_counts = pd.crosstab(
    [right_df['Length']],
    [right_df['Direction']],
    values=right_df['First Contact Success'],
    aggfunc='sum'
)

# Create a cross-tabulation of total counts
total_counts = pd.crosstab(
    [right_df['Length']],
    [right_df['Direction']]
)

success_rates = (success_counts / total_counts) * 100

success_rates = success_rates.fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(success_rates, annot=True, cmap='YlGnBu', fmt='.1f',
            cbar_kws={'label': 'First Contact Success Rate (%)'})
plt.title('First Contact Success Rate by Throw-in Length and Direction (Right Side)', fontsize=16)
plt.xlabel('Direction', fontsize=14)
plt.ylabel('Length', fontsize=14)

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'right_side_first_contact_success_heatmap.png'), dpi=300)
plt.close()

plt.figure(figsize=(10, 6))
length_counts = right_df['Length'].value_counts().sort_index()
bars = plt.bar(length_counts.index, length_counts.values, color=['#8dd3c7', '#bebada', '#fb8072'])

plt.xlabel('Throw-in Length', fontsize=14)
plt.ylabel('Number of Throw-ins', fontsize=14)
plt.title('Distribution of Throw-ins by Length (Right Side)', fontsize=16)
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
            f'{height}',
            ha='center', va='bottom', fontsize=12)

total = length_counts.sum()
for i, bar in enumerate(bars):
    height = bar.get_height()
    percentage = (height / total) * 100
    plt.text(bar.get_x() + bar.get_width()/2., height/2,
            f'{percentage:.1f}%',
            ha='center', va='center', fontsize=12, color='white', fontweight='bold')

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'right_side_throw_in_length_distribution.png'), dpi=300)
plt.close()

plt.figure(figsize=(10, 6))
direction_counts = right_df['Direction'].value_counts().sort_index()
bars = plt.bar(direction_counts.index, direction_counts.values, color=['#80b1d3', '#fdb462', '#b3de69'])

plt.xlabel('Throw-in Direction', fontsize=14)
plt.ylabel('Number of Throw-ins', fontsize=14)
plt.title('Distribution of Throw-ins by Direction (Right Side)', fontsize=16)
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
            f'{height}',
            ha='center', va='bottom', fontsize=12)

total = direction_counts.sum()
for i, bar in enumerate(bars):
    height = bar.get_height()
    percentage = (height / total) * 100
    plt.text(bar.get_x() + bar.get_width()/2., height/2,
            f'{percentage:.1f}%',
            ha='center', va='center', fontsize=12, color='white', fontweight='bold')

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'right_side_throw_in_direction_distribution.png'), dpi=300)
plt.close()

plt.figure(figsize=(12, 7))
metrics = ['First Contact\nSuccess Rate', 'Possession Retention\nSuccess Rate', 'Shot Creation\nSuccess Rate']
values = [
    (right_df['First Contact Success'].sum() / len(right_df)) * 100,
    (len(right_df[(right_df['First Contact Success'] == True) & (right_df['Possession Duration (sec)'] >= 7)]) /
     right_df['First Contact Success'].sum()) * 100 if right_df['First Contact Success'].sum() > 0 else 0,
    (right_df['Shot Created'].sum() / len(right_df)) * 100
]

bars = plt.bar(metrics, values, color=['#66b3ff', '#66c2a5', '#fc8d62'])
plt.ylabel('Success Rate (%)', fontsize=14)
plt.title('Comparison of All Success Rates (Right Side)', fontsize=16)
plt.ylim(0, 100)  # Set y-axis from 0 to 100%
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 2,
            f'{height:.1f}%',
            ha='center', va='bottom', fontsize=12)

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'right_side_all_success_rates_comparison.png'), dpi=300)
plt.close()

plt.figure(figsize=(18, 8))

plt.subplot(1, 2, 1)
left_success_counts = left_df['First Contact Success'].value_counts()
left_labels = ['Success', 'Failure']
left_sizes = [left_success_counts.get(True, 0), left_success_counts.get(False, 0)]
left_colors = ['#66b3ff', '#ff9999']
left_explode = (0.1, 0)

plt.pie(left_sizes, explode=left_explode, labels=left_labels, colors=left_colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')
plt.title('First Contact Success Rate (Left Side)', fontsize=16)

plt.subplot(1, 2, 2)
right_success_counts = right_df['First Contact Success'].value_counts()
right_labels = ['Success', 'Failure']
right_sizes = [right_success_counts.get(True, 0), right_success_counts.get(False, 0)]
right_colors = ['#66b3ff', '#ff9999']
right_explode = (0.1, 0)

plt.pie(right_sizes, explode=right_explode, labels=right_labels, colors=right_colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')
plt.title('First Contact Success Rate (Right Side)', fontsize=16)

plt.suptitle('Comparison of First Contact Success Rate: Left vs Right Side', fontsize=20)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'comparison_first_contact_success.png'), dpi=300)
plt.close()

plt.figure(figsize=(18, 8))

plt.subplot(1, 2, 1)
left_successful_first_contacts = left_df[left_df['First Contact Success'] == True]
left_retention_counts = left_successful_first_contacts['Possession Duration (sec)'].apply(lambda x: x >= 7).value_counts()
left_labels = ['Retained (≥7 sec)', 'Lost (<7 sec)']
left_sizes = [left_retention_counts.get(True, 0), left_retention_counts.get(False, 0)]
left_colors = ['#66c2a5', '#fc8d62']
left_explode = (0.1, 0)

plt.pie(left_sizes, explode=left_explode, labels=left_labels, colors=left_colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')
plt.title('Possession Retention Rate (Left Side)', fontsize=16)

plt.subplot(1, 2, 2)
right_successful_first_contacts = right_df[right_df['First Contact Success'] == True]
right_retention_counts = right_successful_first_contacts['Possession Duration (sec)'].apply(lambda x: x >= 7).value_counts()
right_labels = ['Retained (≥7 sec)', 'Lost (<7 sec)']
right_sizes = [right_retention_counts.get(True, 0), right_retention_counts.get(False, 0)]
right_colors = ['#66c2a5', '#fc8d62']
right_explode = (0.1, 0)

plt.pie(right_sizes, explode=right_explode, labels=right_labels, colors=right_colors, autopct='%1.1f%%',
        shadow=True, startangle=90)
plt.axis('equal')
plt.title('Possession Retention Rate (Right Side)', fontsize=16)

plt.suptitle('Comparison of Possession Retention Rate: Left vs Right Side', fontsize=20)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'comparison_possession_retention.png'), dpi=300)
plt.close()

plt.figure(figsize=(18, 8))

plt.subplot(1, 2, 1)
left_length_counts = left_df['Length'].value_counts().sort_index()
left_bars = plt.bar(left_length_counts.index, left_length_counts.values, color=['#8dd3c7', '#bebada', '#fb8072'])
plt.xlabel('Throw-in Length', fontsize=14)
plt.ylabel('Number of Throw-ins', fontsize=14)
plt.title('Length Distribution (Left Side)', fontsize=16)
plt.grid(axis='y', linestyle='--', alpha=0.7)

left_total = left_length_counts.sum()
for i, bar in enumerate(left_bars):
    height = bar.get_height()
    percentage = (height / left_total) * 100
    plt.text(bar.get_x() + bar.get_width()/2., height/2,
            f'{percentage:.1f}%',
            ha='center', va='center', fontsize=12, color='white', fontweight='bold')

plt.subplot(1, 2, 2)
right_length_counts = right_df['Length'].value_counts().sort_index()
right_bars = plt.bar(right_length_counts.index, right_length_counts.values, color=['#8dd3c7', '#bebada', '#fb8072'])
plt.xlabel('Throw-in Length', fontsize=14)
plt.ylabel('Number of Throw-ins', fontsize=14)
plt.title('Length Distribution (Right Side)', fontsize=16)
plt.grid(axis='y', linestyle='--', alpha=0.7)

right_total = right_length_counts.sum()
for i, bar in enumerate(right_bars):
    height = bar.get_height()
    percentage = (height / right_total) * 100
    plt.text(bar.get_x() + bar.get_width()/2., height/2,
            f'{percentage:.1f}%',
            ha='center', va='center', fontsize=12, color='white', fontweight='bold')

plt.suptitle('Comparison of Throw-in Length Distribution: Left vs Right Side', fontsize=20)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'comparison_length_distribution.png'), dpi=300)
plt.close()

plt.figure(figsize=(18, 8))

plt.subplot(1, 2, 1)
left_direction_counts = left_df['Direction'].value_counts().sort_index()
left_bars = plt.bar(left_direction_counts.index, left_direction_counts.values, color=['#80b1d3', '#fdb462', '#b3de69'])
plt.xlabel('Throw-in Direction', fontsize=14)
plt.ylabel('Number of Throw-ins', fontsize=14)
plt.title('Direction Distribution (Left Side)', fontsize=16)
plt.grid(axis='y', linestyle='--', alpha=0.7)

left_total = left_direction_counts.sum()
for i, bar in enumerate(left_bars):
    height = bar.get_height()
    percentage = (height / left_total) * 100
    plt.text(bar.get_x() + bar.get_width()/2., height/2,
            f'{percentage:.1f}%',
            ha='center', va='center', fontsize=12, color='white', fontweight='bold')

plt.subplot(1, 2, 2)
right_direction_counts = right_df['Direction'].value_counts().sort_index()
right_bars = plt.bar(right_direction_counts.index, right_direction_counts.values, color=['#80b1d3', '#fdb462', '#b3de69'])
plt.xlabel('Throw-in Direction', fontsize=14)
plt.ylabel('Number of Throw-ins', fontsize=14)
plt.title('Direction Distribution (Right Side)', fontsize=16)
plt.grid(axis='y', linestyle='--', alpha=0.7)

right_total = right_direction_counts.sum()
for i, bar in enumerate(right_bars):
    height = bar.get_height()
    percentage = (height / right_total) * 100
    plt.text(bar.get_x() + bar.get_width()/2., height/2,
            f'{percentage:.1f}%',
            ha='center', va='center', fontsize=12, color='white', fontweight='bold')

plt.suptitle('Comparison of Throw-in Direction Distribution: Left vs Right Side', fontsize=20)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'comparison_direction_distribution.png'), dpi=300)
plt.close()

plt.figure(figsize=(18, 8))

plt.subplot(1, 2, 1)
left_metrics = ['First Contact\nSuccess Rate', 'Possession Retention\nSuccess Rate', 'Shot Creation\nSuccess Rate']
left_values = [
    (left_df['First Contact Success'].sum() / len(left_df)) * 100,
    (len(left_df[(left_df['First Contact Success'] == True) & (left_df['Possession Duration (sec)'] >= 7)]) /
     left_df['First Contact Success'].sum()) * 100 if left_df['First Contact Success'].sum() > 0 else 0,
    (len(left_df[left_df['Shot Out'] == 'Yes']) / len(left_df)) * 100
]

left_bars = plt.bar(left_metrics, left_values, color=['#66b3ff', '#66c2a5', '#fc8d62'])
plt.ylabel('Success Rate (%)', fontsize=14)
plt.title('Success Rates (Left Side)', fontsize=16)
plt.ylim(0, 100)
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, bar in enumerate(left_bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 2,
            f'{height:.1f}%',
            ha='center', va='bottom', fontsize=12)

plt.subplot(1, 2, 2)
right_metrics = ['First Contact\nSuccess Rate', 'Possession Retention\nSuccess Rate', 'Shot Creation\nSuccess Rate']
right_values = [
    (right_df['First Contact Success'].sum() / len(right_df)) * 100,
    (len(right_df[(right_df['First Contact Success'] == True) & (right_df['Possession Duration (sec)'] >= 7)]) /
     right_df['First Contact Success'].sum()) * 100 if right_df['First Contact Success'].sum() > 0 else 0,
    (right_df['Shot Created'].sum() / len(right_df)) * 100
]

right_bars = plt.bar(right_metrics, right_values, color=['#66b3ff', '#66c2a5', '#fc8d62'])
plt.ylabel('Success Rate (%)', fontsize=14)
plt.title('Success Rates (Right Side)', fontsize=16)
plt.ylim(0, 100)
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, bar in enumerate(right_bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 2,
            f'{height:.1f}%',
            ha='center', va='bottom', fontsize=12)

plt.suptitle('Comparison of Success Rates: Left vs Right Side', fontsize=20)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'comparison_success_rates.png'), dpi=300)
plt.close()

fig = plt.figure(figsize=(20, 10))
gs = GridSpec(1, 2, figure=fig)

ax1 = fig.add_subplot(gs[0, 0])
left_success_counts = pd.crosstab(
    [left_df['Length']],
    [left_df['Direction']],
    values=left_df['First Contact Success'],
    aggfunc='sum'
)
left_total_counts = pd.crosstab(
    [left_df['Length']],
    [left_df['Direction']]
)
left_success_rates = (left_success_counts / left_total_counts) * 100
left_success_rates = left_success_rates.fillna(0)

sns.heatmap(left_success_rates, annot=True, cmap='YlGnBu', fmt='.1f',
            cbar_kws={'label': 'First Contact Success Rate (%)'}, ax=ax1)
ax1.set_title('First Contact Success Rate by Length and Direction (Left Side)', fontsize=16)
ax1.set_xlabel('Direction', fontsize=14)
ax1.set_ylabel('Length', fontsize=14)

ax2 = fig.add_subplot(gs[0, 1])
right_success_counts = pd.crosstab(
    [right_df['Length']],
    [right_df['Direction']],
    values=right_df['First Contact Success'],
    aggfunc='sum'
)
right_total_counts = pd.crosstab(
    [right_df['Length']],
    [right_df['Direction']]
)
right_success_rates = (right_success_counts / right_total_counts) * 100
right_success_rates = right_success_rates.fillna(0)

sns.heatmap(right_success_rates, annot=True, cmap='YlGnBu', fmt='.1f',
            cbar_kws={'label': 'First Contact Success Rate (%)'}, ax=ax2)
ax2.set_title('First Contact Success Rate by Length and Direction (Right Side)', fontsize=16)
ax2.set_xlabel('Direction', fontsize=14)
ax2.set_ylabel('Length', fontsize=14)

plt.suptitle('Comparison of First Contact Success Rate Heatmaps: Left vs Right Side', fontsize=20)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'comparison_heatmaps.png'), dpi=300)
plt.close()

plt.figure(figsize=(15, 10))

metrics = ['First Contact\nSuccess Rate', 'Possession Retention\nSuccess Rate', 'Shot Creation\nSuccess Rate']
sides = ['Left Side', 'Right Side']

left_values = [
    (left_df['First Contact Success'].sum() / len(left_df)) * 100,
    (len(left_df[(left_df['First Contact Success'] == True) & (left_df['Possession Duration (sec)'] >= 7)]) /
     left_df['First Contact Success'].sum()) * 100 if left_df['First Contact Success'].sum() > 0 else 0,
    (len(left_df[left_df['Shot Out'] == 'Yes']) / len(left_df)) * 100
]

right_values = [
    (right_df['First Contact Success'].sum() / len(right_df)) * 100,
    (len(right_df[(right_df['First Contact Success'] == True) & (right_df['Possession Duration (sec)'] >= 7)]) /
     right_df['First Contact Success'].sum()) * 100 if right_df['First Contact Success'].sum() > 0 else 0,
    (right_df['Shot Created'].sum() / len(right_df)) * 100
]

barWidth = 0.35
r1 = np.arange(len(metrics))
r2 = [x + barWidth for x in r1]

plt.bar(r1, left_values, width=barWidth, color='#66b3ff', label='Left Side')
plt.bar(r2, right_values, width=barWidth, color='#ff9999', label='Right Side')

plt.xlabel('Metrics', fontsize=14)
plt.ylabel('Success Rate (%)', fontsize=14)
plt.title('Comparison of Success Rates Between Left and Right Sides', fontsize=18)
plt.xticks([r + barWidth/2 for r in range(len(metrics))], metrics)
plt.ylim(0, 100)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend()

for i in range(len(metrics)):
    plt.text(r1[i], left_values[i] + 2, f'{left_values[i]:.1f}%', ha='center', va='bottom')
    plt.text(r2[i], right_values[i] + 2, f'{right_values[i]:.1f}%', ha='center', va='bottom')

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'combined_success_rates_comparison.png'), dpi=300)
plt.close()


left_first_contact_success_rate = (left_df['First Contact Success'].sum() / len(left_df)) * 100
left_successful_first_contacts = left_df[left_df['First Contact Success'] == True]
left_possession_retention_rate = (len(left_successful_first_contacts[left_successful_first_contacts['Possession Duration (sec)'] >= 7]) /
                                 len(left_successful_first_contacts)) * 100 if len(left_successful_first_contacts) > 0 else 0
left_shot_creation_rate = (len(left_df[left_df['Shot Out'] == 'Yes']) / len(left_df)) * 100

right_first_contact_success_rate = (right_df['First Contact Success'].sum() / len(right_df)) * 100
right_successful_first_contacts = right_df[right_df['First Contact Success'] == True]
right_possession_retention_rate = (len(right_successful_first_contacts[right_successful_first_contacts['Possession Duration (sec)'] >= 7]) /
                                  len(right_successful_first_contacts)) * 100 if len(right_successful_first_contacts) > 0 else 0
right_shot_creation_rate = (right_df['Shot Created'].sum() / len(right_df)) * 100

first_contact_diff = right_first_contact_success_rate - left_first_contact_success_rate
possession_retention_diff = right_possession_retention_rate - left_possession_retention_rate
shot_creation_diff = right_shot_creation_rate - left_shot_creation_rate

left_length_counts = left_df['Length'].value_counts(normalize=True) * 100
right_length_counts = right_df['Length'].value_counts(normalize=True) * 100

left_direction_counts = left_df['Direction'].value_counts(normalize=True) * 100
right_direction_counts = right_df['Direction'].value_counts(normalize=True) * 100

with open(os.path.join(output_dir, 'throw_in_comparison_summary.txt'), 'w') as f:
    f.write('THROW-IN ASSESSMENT COMPARISON SUMMARY\n')
    f.write('=====================================\n\n')

    f.write('KEY FINDINGS:\n')
    f.write(f'1. First Contact Success Rate: The right side shows a {abs(first_contact_diff):.1f}% {"increase" if first_contact_diff > 0 else "decrease"} compared to the left side ({right_first_contact_success_rate:.1f}% vs {left_first_contact_success_rate:.1f}%).\n\n')

    f.write(f'2. Possession Retention Rate: The right side shows a dramatic {abs(possession_retention_diff):.1f}% {"increase" if possession_retention_diff > 0 else "decrease"} in possession retention after successful first contact ({right_possession_retention_rate:.1f}% vs {left_possession_retention_rate:.1f}%).\n\n')

    f.write(f'3. Shot Creation: Neither side generated shots from throw-ins (both 0%).\n\n')

    f.write('4. Throw-in Characteristics:\n')
    f.write('   - Length Distribution: The right side uses more medium-length throw-ins compared to the left side.\n')
    f.write(f'     Left Side: Short {left_length_counts.get("Short", 0):.1f}%, Medium {left_length_counts.get("Medium", 0):.1f}%\n')
    f.write(f'     Right Side: Short {right_length_counts.get("Short", 0):.1f}%, Medium {right_length_counts.get("Medium", 0):.1f}%\n\n')

    f.write('   - Direction Distribution: The right side has no backward throw-ins, while the left side uses all three directions.\n')
    f.write(f'     Left Side: Forward {left_direction_counts.get("Forward", 0):.1f}%, Lateral {left_direction_counts.get("Lateral", 0):.1f}%, Backward {left_direction_counts.get("Backward", 0):.1f}%\n')
    f.write(f'     Right Side: Forward {right_direction_counts.get("Forward", 0):.1f}%, Lateral {right_direction_counts.get("Lateral", 0):.1f}%, Backward {right_direction_counts.get("Backward", 0):.1f}%\n\n')

    f.write('5. Success by Throw-in Type:\n')

    left_success_by_length = {}
    for length in left_df['Length'].unique():
        length_df = left_df[left_df['Length'] == length]
        success_rate = (length_df['First Contact Success'].sum() / len(length_df)) * 100 if len(length_df) > 0 else 0
        left_success_by_length[length] = success_rate

    left_success_by_direction = {}
    for direction in left_df['Direction'].unique():
        direction_df = left_df[left_df['Direction'] == direction]
        success_rate = (direction_df['First Contact Success'].sum() / len(direction_df)) * 100 if len(direction_df) > 0 else 0
        left_success_by_direction[direction] = success_rate

    right_success_by_length = {}
    for length in right_df['Length'].unique():
        length_df = right_df[right_df['Length'] == length]
        success_rate = (length_df['First Contact Success'].sum() / len(length_df)) * 100 if len(length_df) > 0 else 0
        right_success_by_length[length] = success_rate

    right_success_by_direction = {}
    for direction in right_df['Direction'].unique():
        direction_df = right_df[right_df['Direction'] == direction]
        success_rate = (direction_df['First Contact Success'].sum() / len(direction_df)) * 100 if len(direction_df) > 0 else 0
        right_success_by_direction[direction] = success_rate

    f.write('   - By Length:\n')
    for length in set(list(left_success_by_length.keys()) + list(right_success_by_length.keys())):
        left_rate = left_success_by_length.get(length, 0)
        right_rate = right_success_by_length.get(length, 0)
        f.write(f'     {length}: Left Side {left_rate:.1f}%, Right Side {right_rate:.1f}%\n')

    f.write('   - By Direction:\n')
    for direction in set(list(left_success_by_direction.keys()) + list(right_success_by_direction.keys())):
        left_rate = left_success_by_direction.get(direction, 0)
        right_rate = right_success_by_direction.get(direction, 0)
        f.write(f'     {direction}: Left Side {left_rate:.1f}%, Right Side {right_rate:.1f}%\n')

    f.write('\nNOTE: The right side dataset contains fewer throw-ins (5) compared to the left side (20), which may affect the reliability of comparisons.\n')

print("Analysis complete! All visualizations have been saved to the 'output' directory.")
print("To display the images in Google Colab, you can use the following code:")
print("from IPython.display import Image, display")
print("display(Image('./output/comparison_heatmaps.png'))")


FileNotFoundError: [Errno 2] No such file or directory: 'Left Side_table_data.csv'

In [None]:
from google.colab import files
uploaded = files.upload()


Saving post intervention data.xlsx to post intervention data.xlsx


In [None]:
"""
Throw-In Intervention Analysis Visualization Code

This script loads baseline and post-intervention throw-in data for two athletes,
calculates key performance metrics, and creates visualizations to compare progress.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import os

# Create output directory for visualizations
os.makedirs('progress_analysis', exist_ok=True)

# Function to parse possession duration to seconds
def parse_duration(duration_str):
    if pd.isna(duration_str) or duration_str == 'Zero':
        return 0

    # Extract numeric part using regex
    match = re.search(r'(\d+)', str(duration_str))
    if match:
        return int(match.group(1))
    return 0

# Load and preprocess baseline data
left_baseline = pd.read_csv('LeftSide_table_data.csv')
right_baseline = pd.read_csv('right_side_throw_ins.csv')

# Clean and standardize baseline data
left_baseline = left_baseline[left_baseline['No.'] != 'Total']  # Remove any summary rows
left_baseline['No.'] = pd.to_numeric(left_baseline['No.'], errors='coerce')
left_baseline['First Contact Success'] = left_baseline['Success/Fail'].str.startswith('Yes')
left_baseline['Possession Duration (sec)'] = left_baseline['Possession Duration'].apply(parse_duration)
left_baseline['Possession Retained'] = left_baseline['Possession Duration (sec)'] >= 7
left_baseline['Shot Created'] = left_baseline['Shot Out'] == 'Yes'
left_baseline['Athlete'] = 2  # Left side is Athlete 2

right_baseline = right_baseline[right_baseline['No.'] != 'Total']  # Remove any summary rows
right_baseline['No.'] = pd.to_numeric(right_baseline['No.'], errors='coerce')
right_baseline['First Contact Success'] = right_baseline['Success/Fail'].str.startswith('Yes')
right_baseline['Possession Duration (sec)'] = right_baseline['Possession Duration'].apply(parse_duration)
right_baseline['Possession Retained'] = right_baseline['Possession Duration (sec)'] >= 7
right_baseline['Shot Created'] = right_baseline['Shot Creation '].str.startswith('Yes') if 'Shot Creation ' in right_baseline.columns else False
right_baseline['Athlete'] = 1  # Right side is Athlete 1

# Combine baseline data
baseline = pd.concat([left_baseline, right_baseline], ignore_index=True)

# Load and preprocess post-intervention data
post = pd.read_excel('postinterventiondata.xlsx')

# Print column names to debug
print('Post-intervention column names:')
for col in post.columns:
    print(f'Column: \"{col}\"')

# Process post-intervention data
post['First Contact Success'] = post['Success\\Fail'].str.lower().str.startswith('s')
post['Possession Duration (sec)'] = post['Possession Duration'].apply(parse_duration)
post['Possession Retained'] = post['Possession Duration (sec)'] >= 7

# Handle the shot column with proper name
shot_column = [col for col in post.columns if col.strip().lower() == 'shot'][0]
post['Shot Created'] = post[shot_column].str.lower() == 'yes'

# Print basic statistics for verification
print('\nBaseline Data Summary:')
print(f'Total throw-ins: {len(baseline)}')
print(f'Athlete 1 (Right): {len(baseline[baseline.Athlete == 1])} throw-ins')
print(f'Athlete 2 (Left): {len(baseline[baseline.Athlete == 2])} throw-ins')

print('\nPost-Intervention Data Summary:')
print(f'Total throw-ins: {len(post)}')
print(f'Athlete 1 (Right): {len(post[post.Athlete == 1])} throw-ins')
print(f'Athlete 2 (Left): {len(post[post.Athlete == 2])} throw-ins')

# Calculate key metrics for each athlete and stage
metrics = []

# Baseline metrics for Athlete 1 (Right)
athlete1_baseline = baseline[baseline.Athlete == 1]
athlete1_baseline_success_rate = athlete1_baseline['First Contact Success'].mean() * 100
athlete1_baseline_possession_rate = athlete1_baseline[athlete1_baseline['First Contact Success']]['Possession Retained'].mean() * 100 if len(athlete1_baseline[athlete1_baseline['First Contact Success']]) > 0 else 0
athlete1_baseline_shot_rate = athlete1_baseline['Shot Created'].mean() * 100
metrics.append(['Baseline', 1, 'First Contact Success Rate', athlete1_baseline_success_rate])
metrics.append(['Baseline', 1, 'Possession Retention Rate', athlete1_baseline_possession_rate])
metrics.append(['Baseline', 1, 'Shot Creation Rate', athlete1_baseline_shot_rate])

# Baseline metrics for Athlete 2 (Left)
athlete2_baseline = baseline[baseline.Athlete == 2]
athlete2_baseline_success_rate = athlete2_baseline['First Contact Success'].mean() * 100
athlete2_baseline_possession_rate = athlete2_baseline[athlete2_baseline['First Contact Success']]['Possession Retained'].mean() * 100 if len(athlete2_baseline[athlete2_baseline['First Contact Success']]) > 0 else 0
athlete2_baseline_shot_rate = athlete2_baseline['Shot Created'].mean() * 100
metrics.append(['Baseline', 2, 'First Contact Success Rate', athlete2_baseline_success_rate])
metrics.append(['Baseline', 2, 'Possession Retention Rate', athlete2_baseline_possession_rate])
metrics.append(['Baseline', 2, 'Shot Creation Rate', athlete2_baseline_shot_rate])

# Post-intervention metrics for Athlete 1 (Right)
athlete1_post = post[post.Athlete == 1]
athlete1_post_success_rate = athlete1_post['First Contact Success'].mean() * 100
athlete1_post_possession_rate = athlete1_post[athlete1_post['First Contact Success']]['Possession Retained'].mean() * 100 if len(athlete1_post[athlete1_post['First Contact Success']]) > 0 else 0
athlete1_post_shot_rate = athlete1_post['Shot Created'].mean() * 100
metrics.append(['Post-Intervention', 1, 'First Contact Success Rate', athlete1_post_success_rate])
metrics.append(['Post-Intervention', 1, 'Possession Retention Rate', athlete1_post_possession_rate])
metrics.append(['Post-Intervention', 1, 'Shot Creation Rate', athlete1_post_shot_rate])

# Post-intervention metrics for Athlete 2 (Left)
athlete2_post = post[post.Athlete == 2]
athlete2_post_success_rate = athlete2_post['First Contact Success'].mean() * 100
athlete2_post_possession_rate = athlete2_post[athlete2_post['First Contact Success']]['Possession Retained'].mean() * 100 if len(athlete2_post[athlete2_post['First Contact Success']]) > 0 else 0
athlete2_post_shot_rate = athlete2_post['Shot Created'].mean() * 100
metrics.append(['Post-Intervention', 2, 'First Contact Success Rate', athlete2_post_success_rate])
metrics.append(['Post-Intervention', 2, 'Possession Retention Rate', athlete2_post_possession_rate])
metrics.append(['Post-Intervention', 2, 'Shot Creation Rate', athlete2_post_shot_rate])

# Convert metrics to DataFrame for easier plotting
metrics_df = pd.DataFrame(metrics, columns=['Stage', 'Athlete', 'Metric', 'Value'])
print('\nCalculated Metrics:')
print(metrics_df)

# Save metrics to CSV for reference
metrics_df.to_csv('progress_analysis/performance_metrics.csv', index=False)

# Set style for plots
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('colorblind')

# 1. Create comparison bar charts for each metric, comparing both athletes and stages
metrics_list = metrics_df['Metric'].unique()

for metric in metrics_list:
    plt.figure(figsize=(10, 6))

    # Filter data for this metric
    metric_data = metrics_df[metrics_df['Metric'] == metric]

    # Create grouped bar chart
    ax = sns.barplot(x='Athlete', y='Value', hue='Stage', data=metric_data)

    # Add value labels on top of bars
    for i, p in enumerate(ax.patches):
        height = p.get_height()
        ax.text(p.get_x() + p.get_width()/2., height + 1,
                f'{height:.1f}%',
                ha='center', fontsize=10)

    # Set labels and title
    plt.xlabel('Athlete', fontsize=12)
    plt.ylabel('Percentage (%)', fontsize=12)
    plt.title(f'{metric} - Baseline vs Post-Intervention', fontsize=14)
    plt.xticks([0, 1], ['Athlete 1 (Right Back)', 'Athlete 2 (Left Back)'])
    plt.ylim(0, 110)  # Set y-axis from 0 to 110% to accommodate labels

    # Add legend
    plt.legend(title='Stage')

    # Add grid for better readability
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Save figure
    plt.tight_layout()
    plt.savefig(f'progress_analysis/{metric.replace(" ", "_").lower()}_comparison.png', dpi=300)
    plt.close()

# 2. Create a combined metrics visualization for each athlete
for athlete in [1, 2]:
    plt.figure(figsize=(12, 7))

    # Filter data for this athlete
    athlete_data = metrics_df[metrics_df['Athlete'] == athlete]

    # Reshape data for grouped bar chart
    athlete_pivot = athlete_data.pivot(index='Metric', columns='Stage', values='Value').reset_index()

    # Set up bar positions
    metrics = athlete_pivot['Metric']
    x = np.arange(len(metrics))
    width = 0.35

    # Create bars
    baseline_bars = plt.bar(x - width/2, athlete_pivot['Baseline'], width, label='Baseline')
    post_bars = plt.bar(x + width/2, athlete_pivot['Post-Intervention'], width, label='Post-Intervention')

    # Add value labels on top of bars
    for bars in [baseline_bars, post_bars]:
        for bar in bars:
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height + 1,
                    f'{height:.1f}%',
                    ha='center', va='bottom', fontsize=10)

    # Set labels and title
    plt.xlabel('Performance Metrics', fontsize=12)
    plt.ylabel('Percentage (%)', fontsize=12)
    plt.title(f'Athlete {athlete} - Performance Metrics Comparison', fontsize=14)
    plt.xticks(x, metrics, rotation=15)
    plt.ylim(0, 110)  # Set y-axis from 0 to 110% to accommodate labels

    # Add legend
    plt.legend(title='Stage')

    # Add grid for better readability
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Save figure
    plt.tight_layout()
    plt.savefig(f'progress_analysis/athlete_{athlete}_metrics_comparison.png', dpi=300)
    plt.close()

# 3. Create a radar chart for comprehensive comparison
# First, prepare data for radar chart
metrics_list = metrics_df['Metric'].unique()
angles = np.linspace(0, 2*np.pi, len(metrics_list), endpoint=False).tolist()
angles += angles[:1]  # Close the circle

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7), subplot_kw=dict(polar=True))

# Athlete 1 Radar Chart
athlete1_data = metrics_df[metrics_df['Athlete'] == 1]
baseline_values = athlete1_data[athlete1_data['Stage'] == 'Baseline']['Value'].tolist()
post_values = athlete1_data[athlete1_data['Stage'] == 'Post-Intervention']['Value'].tolist()

# Close the loop for plotting
baseline_values += baseline_values[:1]
post_values += post_values[:1]
metrics_labels = metrics_list.tolist() + [metrics_list[0]]

# Plot Athlete 1
ax1.plot(angles, baseline_values, 'b-', linewidth=2, label='Baseline')
ax1.fill(angles, baseline_values, 'b', alpha=0.1)
ax1.plot(angles, post_values, 'r-', linewidth=2, label='Post-Intervention')
ax1.fill(angles, post_values, 'r', alpha=0.1)
ax1.set_title('Athlete 1 (Right Back) - Performance Radar', fontsize=14)

# Athlete 2 Radar Chart
athlete2_data = metrics_df[metrics_df['Athlete'] == 2]
baseline_values = athlete2_data[athlete2_data['Stage'] == 'Baseline']['Value'].tolist()
post_values = athlete2_data[athlete2_data['Stage'] == 'Post-Intervention']['Value'].tolist()

# Close the loop for plotting
baseline_values += baseline_values[:1]
post_values += post_values[:1]

# Plot Athlete 2
ax2.plot(angles, baseline_values, 'b-', linewidth=2, label='Baseline')
ax2.fill(angles, baseline_values, 'b', alpha=0.1)
ax2.plot(angles, post_values, 'r-', linewidth=2, label='Post-Intervention')
ax2.fill(angles, post_values, 'r', alpha=0.1)
ax2.set_title('Athlete 2 (Left Back) - Performance Radar', fontsize=14)

# Set radar chart properties for both subplots
for ax in [ax1, ax2]:
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels([m.replace(' Rate', '') for m in metrics_list])
    ax.set_ylim(0, 100)
    ax.grid(True)
    ax.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))

plt.tight_layout()
plt.savefig('progress_analysis/radar_chart_comparison.png', dpi=300)
plt.close()

# 4. Create a progress chart showing improvement percentages
# Calculate improvement for each metric and athlete
improvements = []

for athlete in [1, 2]:
    for metric in metrics_list:
        baseline = metrics_df[(metrics_df['Athlete'] == athlete) &
                             (metrics_df['Stage'] == 'Baseline') &
                             (metrics_df['Metric'] == metric)]['Value'].values[0]

        post = metrics_df[(metrics_df['Athlete'] == athlete) &
                         (metrics_df['Stage'] == 'Post-Intervention') &
                         (metrics_df['Metric'] == metric)]['Value'].values[0]

        improvement = post - baseline
        improvements.append([athlete, metric, improvement])

# Convert to DataFrame
improvement_df = pd.DataFrame(improvements, columns=['Athlete', 'Metric', 'Improvement'])

# Create improvement visualization
plt.figure(figsize=(12, 8))
ax = sns.barplot(x='Metric', y='Improvement', hue='Athlete', data=improvement_df)

# Add value labels on top of bars
for i, p in enumerate(ax.patches):
    height = p.get_height()
    if height >= 0:
        va = 'bottom'
        y_pos = height + 0.5
    else:
        va = 'top'
        y_pos = height - 0.5

    ax.text(p.get_x() + p.get_width()/2., y_pos,
            f'{height:+.1f}%',
            ha='center', va=va, fontsize=10)

# Set labels and title
plt.xlabel('Performance Metrics', fontsize=12)
plt.ylabel('Improvement (Percentage Points)', fontsize=12)
plt.title('Performance Improvement: Post-Intervention vs Baseline', fontsize=14)
plt.xticks(rotation=15)
plt.legend(title='Athlete', labels=['Athlete 1 (Right Back)', 'Athlete 2 (Left Back)'])

# Add a horizontal line at y=0
plt.axhline(y=0, color='black', linestyle='-', alpha=0.3)

# Add grid for better readability
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Save figure
plt.tight_layout()
plt.savefig('progress_analysis/performance_improvement.png', dpi=300)
plt.close()

# 5. Create a summary visualization showing overall progress
# Calculate average improvement for each athlete
athlete1_avg_improvement = improvement_df[improvement_df['Athlete'] == 1]['Improvement'].mean()
athlete2_avg_improvement = improvement_df[improvement_df['Athlete'] == 2]['Improvement'].mean()

plt.figure(figsize=(10, 6))
bars = plt.bar([1, 2], [athlete1_avg_improvement, athlete2_avg_improvement], color=['#1f77b4', '#ff7f0e'])

# Add value labels on top of bars
for bar in bars:
    height = bar.get_height()
    if height >= 0:
        va = 'bottom'
        y_pos = height + 0.5
    else:
        va = 'top'
        y_pos = height - 0.5

    plt.text(bar.get_x() + bar.get_width()/2., y_pos,
            f'{height:+.1f}%',
            ha='center', va=va, fontsize=12)

# Set labels and title
plt.xlabel('Athlete', fontsize=12)
plt.ylabel('Average Improvement (Percentage Points)', fontsize=12)
plt.title('Overall Performance Improvement by Athlete', fontsize=14)
plt.xticks([1, 2], ['Athlete 1 (Right Back)', 'Athlete 2 (Left Back)'])

# Add a horizontal line at y=0
plt.axhline(y=0, color='black', linestyle='-', alpha=0.3)

# Add grid for better readability
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Save figure
plt.tight_layout()
plt.savefig('progress_analysis/overall_improvement.png', dpi=300)
plt.close()

print('All visualizations created and saved to progress_analysis/ directory')

# Additional code to create visualizations with sample size context
# This would show the actual counts alongside percentages

# Create a function to add sample size context to bar charts
def create_metric_comparison_with_counts(metric_name):
    plt.figure(figsize=(12, 8))

    # Filter data for this metric
    metric_data = metrics_df[metrics_df['Metric'] == metric_name]

    # Get raw counts for annotation
    counts = {
        (1, 'Baseline'): {
            'total': len(athlete1_baseline),
            'success': athlete1_baseline['First Contact Success'].sum(),
            'retained': athlete1_baseline[athlete1_baseline['First Contact Success']]['Possession Retained'].sum(),
            'shots': athlete1_baseline['Shot Created'].sum()
        },
        (1, 'Post-Intervention'): {
            'total': len(athlete1_post),
            'success': athlete1_post['First Contact Success'].sum(),
            'retained': athlete1_post[athlete1_post['First Contact Success']]['Possession Retained'].sum(),
            'shots': athlete1_post['Shot Created'].sum()
        },
        (2, 'Baseline'): {
            'total': len(athlete2_baseline),
            'success': athlete2_baseline['First Contact Success'].sum(),
            'retained': athlete2_baseline[athlete2_baseline['First Contact Success']]['Possession Retained'].sum(),
            'shots': athlete2_baseline['Shot Created'].sum()
        },
        (2, 'Post-Intervention'): {
            'total': len(athlete2_post),
            'success': athlete2_post['First Contact Success'].sum(),
            'retained': athlete2_post[athlete2_post['First Contact Success']]['Possession Retained'].sum(),
            'shots': athlete2_post['Shot Created'].sum()
        }
    }

    # Create grouped bar chart
    ax = sns.barplot(x='Athlete', y='Value', hue='Stage', data=metric_data)

    # Add value labels with counts
    for i, p in enumerate(ax.patches):
        height = p.get_height()

        # Determine which athlete and stage this bar represents
        athlete = 1 if i < 2 else 2
        stage = 'Baseline' if i % 2 == 0 else 'Post-Intervention'

        # Get appropriate count based on metric
        if metric_name == 'First Contact Success Rate':
            count_text = f"{counts[(athlete, stage)]['success']}/{counts[(athlete, stage)]['total']}"
        elif metric_name == 'Possession Retention Rate':
            count_text = f"{counts[(athlete, stage)]['retained']}/{counts[(athlete, stage)]['success']}"
        else:  # Shot Creation Rate
            count_text = f"{counts[(athlete, stage)]['shots']}/{counts[(athlete, stage)]['total']}"

        ax.text(p.get_x() + p.get_width()/2., height + 1,
                f'{height:.1f}%\n({count_text})',
                ha='center', fontsize=10)

    # Set labels and title
    plt.xlabel('Athlete', fontsize=12)
    plt.ylabel('Percentage (%)', fontsize=12)
    plt.title(f'{metric_name} - Baseline vs Post-Intervention (with Sample Sizes)', fontsize=14)
    plt.xticks([0, 1], ['Athlete 1 (Right Back)', 'Athlete 2 (Left Back)'])
    plt.ylim(0, 110)  # Set y-axis from 0 to 110% to accommodate labels

    # Add legend
    plt.legend(title='Stage')

    # Add grid for better readability
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Save figure
    plt.tight_layout()
    plt.savefig(f'progress_analysis/{metric_name.replace(" ", "_").lower()}_with_counts.png', dpi=300)
    plt.close()

# Create sample size context visualizations for each metric
for metric in metrics_list:
    create_metric_comparison_with_counts(metric)

print('Additional visualizations with sample size context created')


FileNotFoundError: [Errno 2] No such file or directory: 'LeftSide_table_data.csv'

In [None]:
os.listdir('output')


['combined_success_rates_comparison.png',
 'first_contact_success_pie.png',
 'comparison_heatmaps.png',
 'first_contact_success_heatmap.png',
 'possession_retention_success_pie.png',
 'throw_in_length_distribution.png',
 'possession_duration_bar.png',
 'right_side_possession_retention_success_pie.png',
 'right_side_throw_in_length_distribution.png',
 'right_side_first_contact_success_pie.png',
 'categorized_throw_in_data.csv',
 'comparison_success_rates.png',
 'throw_in_direction_pie.png',
 'processed_right_side_data.csv',
 'right_side_throw_in_direction_distribution.png',
 'right_side_all_success_rates_comparison.png',
 'throw_in_direction_distribution.png',
 'right_side_shot_creation_success_pie.png',
 'comparison_direction_distribution.png',
 'comparison_possession_retention.png',
 'comparison_first_contact_success.png',
 'comparison_length_distribution.png',
 'shot_creation_success_pie.png',
 'right_side_possession_duration_bar.png',
 'all_success_rates_comparison.png',
 'throw_in_