## Ultimate Goals:
* Find optimal shooting form/release point
  * Include the feedback system based on optimal shooting form and release point
* Exhaustion levels and optimal energy max and min
* Shot outcome prediction


1. Data Loading and Basic Parsing Module with Test
2. Initial DataFrame Creation and Column Analysis Module with Test

In [10]:
# %%writefile ../../src/load_and_dataframe.py

import os
import json
import pandas as pd
import numpy as np

# Function to parse JSON trial data into a DataFrame
def load_and_parse_json(file_path, debug=False):
    if debug:
        print(f"Debug: Loading and parsing file: {file_path}")
    
    try:
        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
            data = json.load(f)
        
        trial_id = data['trial_id']
        result = 1 if data['result'] == 'made' else 0
        landing_x = data['landing_x']
        landing_y = data['landing_y']
        entry_angle = data['entry_angle']
        release_frame = data.get('release_frame', None)

        if debug:
            print(f"Debug: Trial ID: {trial_id}, Result: {result}, Release Frame: {release_frame}")
        
        return data, trial_id, result, landing_x, landing_y, entry_angle, release_frame
    except Exception as e:
        print(f"Error: Failed to load or parse JSON file: {file_path}. Exception: {e}")
        return None, None, None, None, None, None, None

# Main function to test loading and parsing
def main_load_and_parse(file_path):
    data, trial_id, result, landing_x, landing_y, entry_angle, release_frame = load_and_parse_json(file_path, debug=True)
    if data is not None:
        print("Debug: Successfully loaded and parsed the JSON file.")
    else:
        print("Error: Failed to load and parse JSON file.")
    return data

# Test the function
test_file = "../../SPL-Open-Data/basketball/freethrow/data/P0001/BB_FT_P0001_T0001.json"
main_load_and_parse(test_file)

# Function to create DataFrame from parsed JSON data and analyze its structure
def create_dataframe(data, trial_id, result, landing_x, landing_y, entry_angle, debug=False):
    frame_data = []
    for frame in data['tracking']:
        frame_time = frame['time']
        ball_pos = frame['data'].get('ball', [None, None, None])
        player_pos = frame['data']['player']

        flat_frame = {
            'trial_id': trial_id,
            'result': result,
            'landing_x': landing_x,
            'landing_y': landing_y,
            'entry_angle': entry_angle,
            'frame_time': frame_time,
            'ball_x': ball_pos[0] if ball_pos[0] is not None else np.nan,
            'ball_y': ball_pos[1] if ball_pos[1] is not None else np.nan,
            'ball_z': ball_pos[2] if ball_pos[2] is not None else np.nan,
        }

        for part, coords in player_pos.items():
            flat_frame[f'{part}_x'] = coords[0]
            flat_frame[f'{part}_y'] = coords[1]
            flat_frame[f'{part}_z'] = coords[2]

        frame_data.append(flat_frame)
    
    df = pd.DataFrame(frame_data)

    if debug:
        print(f"Debug: Created DataFrame for Trial ID: {trial_id}")
        
        # Display the columns and check unique values for key columns
        print("Debug: Unique columns in DataFrame:")
        print(df.columns)
        
        # Show data types for each column
        print("\nDebug: Data types of columns:")
        print(df.dtypes)
        
        # Display only one row as a sample
        print("\nDebug: Single sample row:")
        print(df.iloc[0])

        # Show unique values in key categorical columns
        print("\nDebug: Unique values for 'result':")
        print(df['result'].unique())
        
        # Check for null value counts in each column
        print("\nDebug: Null value counts per column:")
        null_counts = df.isna().sum()
        print(null_counts)
        
        # Display a basic description of numeric columns for insights
        print("\nDebug: Description of numeric columns:")
        print(df.describe())
    
    return df

# Main function to test DataFrame creation
def main_create_dataframe(file_path):
    data, trial_id, result, landing_x, landing_y, entry_angle, _ = load_and_parse_json(file_path, debug=True)
    if data:
        df = create_dataframe(data, trial_id, result, landing_x, landing_y, entry_angle, debug=True)
        print("\nDebug: DataFrame created successfully. Sample data:")
        print(df.head(10))
    else:
        print("Error: Data not available for DataFrame creation.")

# Test the function
main_create_dataframe(test_file)


Debug: Loading and parsing file: ../../SPL-Open-Data/basketball/freethrow/data/P0001/BB_FT_P0001_T0001.json
Debug: Trial ID: T0001, Result: 0, Release Frame: None
Debug: Successfully loaded and parsed the JSON file.
Debug: Loading and parsing file: ../../SPL-Open-Data/basketball/freethrow/data/P0001/BB_FT_P0001_T0001.json
Debug: Trial ID: T0001, Result: 0, Release Frame: None
Debug: Created DataFrame for Trial ID: T0001
Debug: Unique columns in DataFrame:
Index(['trial_id', 'result', 'landing_x', 'landing_y', 'entry_angle',
       'frame_time', 'ball_x', 'ball_y', 'ball_z', 'R_EYE_x', 'R_EYE_y',
       'R_EYE_z', 'L_EYE_x', 'L_EYE_y', 'L_EYE_z', 'NOSE_x', 'NOSE_y',
       'NOSE_z', 'R_EAR_x', 'R_EAR_y', 'R_EAR_z', 'L_EAR_x', 'L_EAR_y',
       'L_EAR_z', 'R_SHOULDER_x', 'R_SHOULDER_y', 'R_SHOULDER_z',
       'L_SHOULDER_x', 'L_SHOULDER_y', 'L_SHOULDER_z', 'R_ELBOW_x',
       'R_ELBOW_y', 'R_ELBOW_z', 'L_ELBOW_x', 'L_ELBOW_y', 'L_ELBOW_z',
       'R_WRIST_x', 'R_WRIST_y', 'R_WRIST_z', 'L

3. Ball Data Integrity Checks with Test

In [11]:
# %%writefile ../../src/data_integrity.py

import os
import json
import pandas as pd
import numpy as np
#from load_and_dataframe import load_and_parse_json, create_dataframe

# Function to check for missing or invalid data in all columns
def check_data_integrity(df, debug=False):
    # Calculate null counts for all columns
    null_counts = df.isna().sum()
    
    if debug:
        print("\nDebug: Data integrity check - Missing counts for all columns:")
        print(null_counts)
    
    # Identify columns with missing data
    problematic_columns = null_counts[null_counts > 0]
    
    if not problematic_columns.empty:
        print("\nWarning: The following columns have missing data:")
        for col, count in problematic_columns.items():
            print(f"Column '{col}' has {count} missing values.")
    else:
        print("\nInfo: No columns with missing data detected.")
    
    return problematic_columns

# Main function to test the data integrity check for all columns
def main_check_data_integrity(file_path):
    data, trial_id, result, landing_x, landing_y, entry_angle, _ = load_and_parse_json(file_path, debug=True)
    if data:
        df = create_dataframe(data, trial_id, result, landing_x, landing_y, entry_angle, debug=True)
        check_data_integrity(df, debug=True)
    else:
        print("Error: Data not available for data integrity check.")

# Test the function
main_check_data_integrity(test_file)



Debug: Loading and parsing file: ../../SPL-Open-Data/basketball/freethrow/data/P0001/BB_FT_P0001_T0001.json
Debug: Trial ID: T0001, Result: 0, Release Frame: None
Debug: Created DataFrame for Trial ID: T0001
Debug: Unique columns in DataFrame:
Index(['trial_id', 'result', 'landing_x', 'landing_y', 'entry_angle',
       'frame_time', 'ball_x', 'ball_y', 'ball_z', 'R_EYE_x', 'R_EYE_y',
       'R_EYE_z', 'L_EYE_x', 'L_EYE_y', 'L_EYE_z', 'NOSE_x', 'NOSE_y',
       'NOSE_z', 'R_EAR_x', 'R_EAR_y', 'R_EAR_z', 'L_EAR_x', 'L_EAR_y',
       'L_EAR_z', 'R_SHOULDER_x', 'R_SHOULDER_y', 'R_SHOULDER_z',
       'L_SHOULDER_x', 'L_SHOULDER_y', 'L_SHOULDER_z', 'R_ELBOW_x',
       'R_ELBOW_y', 'R_ELBOW_z', 'L_ELBOW_x', 'L_ELBOW_y', 'L_ELBOW_z',
       'R_WRIST_x', 'R_WRIST_y', 'R_WRIST_z', 'L_WRIST_x', 'L_WRIST_y',
       'L_WRIST_z', 'R_HIP_x', 'R_HIP_y', 'R_HIP_z', 'L_HIP_x', 'L_HIP_y',
       'L_HIP_z', 'R_KNEE_x', 'R_KNEE_y', 'R_KNEE_z', 'L_KNEE_x', 'L_KNEE_y',
       'L_KNEE_z', 'R_ANKLE_x', 'R_ANKL

4. Velocity and Speed Calculation Module with Test

In [12]:
# %%writefile ../../src/feature_engineering/velocity_and_speed_calc.py

# Function to calculate ball speed and velocity
def calculate_ball_speed_and_velocity(df, debug=False):
    df['ball_speed'] = np.sqrt(
        (df['ball_x'].diff()**2) + 
        (df['ball_y'].diff()**2) + 
        (df['ball_z'].diff()**2)
    ) / df['frame_time'].diff()

    df['rvx'] = df['ball_x'].diff() / df['frame_time'].diff()
    df['rvy'] = df['ball_y'].diff() / df['frame_time'].diff()
    df['rvz'] = df['ball_z'].diff() / df['frame_time'].diff()
    df['rv'] = np.sqrt(df['rvx']**2 + df['rvy']**2 + df['rvz']**2)

    if debug:
        print("Debug: Calculated ball speed and velocity.")
        nan_counts = df[['ball_speed', 'rvx', 'rvy', 'rvz', 'rv']].isna().sum()
        print("Debug: NaN counts for speed and velocity columns:")
        print(nan_counts)
        print("Sample data for speed and velocity:\n", df[['frame_time', 'ball_speed', 'rvx', 'rvy', 'rvz', 'rv']].head(10))
    
    return df

# Main function to test ball speed and velocity calculation
def main_calculate_ball_speed_velocity(file_path):
    data, trial_id, result, landing_x, landing_y, entry_angle, _ = load_and_parse_json(file_path, debug=True)
    if data:
        df = create_dataframe(data, trial_id, result, landing_x, landing_y, entry_angle, debug=True)
        df = calculate_ball_speed_and_velocity(df, debug=True)
    else:
        print("Error: Data not available for ball speed and velocity calculation.")

# Test the function
main_calculate_ball_speed_velocity(test_file)


Debug: Loading and parsing file: ../../SPL-Open-Data/basketball/freethrow/data/P0001/BB_FT_P0001_T0001.json
Debug: Trial ID: T0001, Result: 0, Release Frame: None
Debug: Created DataFrame for Trial ID: T0001
Debug: Unique columns in DataFrame:
Index(['trial_id', 'result', 'landing_x', 'landing_y', 'entry_angle',
       'frame_time', 'ball_x', 'ball_y', 'ball_z', 'R_EYE_x', 'R_EYE_y',
       'R_EYE_z', 'L_EYE_x', 'L_EYE_y', 'L_EYE_z', 'NOSE_x', 'NOSE_y',
       'NOSE_z', 'R_EAR_x', 'R_EAR_y', 'R_EAR_z', 'L_EAR_x', 'L_EAR_y',
       'L_EAR_z', 'R_SHOULDER_x', 'R_SHOULDER_y', 'R_SHOULDER_z',
       'L_SHOULDER_x', 'L_SHOULDER_y', 'L_SHOULDER_z', 'R_ELBOW_x',
       'R_ELBOW_y', 'R_ELBOW_z', 'L_ELBOW_x', 'L_ELBOW_y', 'L_ELBOW_z',
       'R_WRIST_x', 'R_WRIST_y', 'R_WRIST_z', 'L_WRIST_x', 'L_WRIST_y',
       'L_WRIST_z', 'R_HIP_x', 'R_HIP_y', 'R_HIP_z', 'L_HIP_x', 'L_HIP_y',
       'L_HIP_z', 'R_KNEE_x', 'R_KNEE_y', 'R_KNEE_z', 'L_KNEE_x', 'L_KNEE_y',
       'L_KNEE_z', 'R_ANKLE_x', 'R_ANKL

5. Joint Power Calculation Module with Test

In [13]:
# %%writefile ../../src/feature_engineering/joint_power_calc.py

# Function to calculate power retained at each joint section
def calculate_joint_power(df, joints, debug=False):
    for joint in joints:
        joint_speed = np.sqrt(
            (df[f'{joint}_x'].diff()**2) + 
            (df[f'{joint}_y'].diff()**2) + 
            (df[f'{joint}_z'].diff()**2)
        ) / df['frame_time'].diff()

        df[f'{joint}_power'] = joint_speed

        if debug:
            print(f"Debug: {joint} power calculated.")
            print(f"Sample data for {joint} power:\n", df[[f'{joint}_power']].head(10))
    
    return df

# Main function to test joint power calculation
def main_calculate_joint_power(file_path):
    data, trial_id, result, landing_x, landing_y, entry_angle, _ = load_and_parse_json(file_path, debug=True)
    if data:
        df = create_dataframe(data, trial_id, result, landing_x, landing_y, entry_angle, debug=True)
        joints_to_check = ['L_ANKLE', 'R_ANKLE', 'L_KNEE', 'R_KNEE', 'L_HIP', 'R_HIP', 'L_ELBOW', 'R_ELBOW', 'L_WRIST', 'R_WRIST']
        df = calculate_joint_power(df, joints_to_check, debug=True)
    else:
        print("Error: Data not available for joint power calculation.")

# Test the function
main_calculate_joint_power(test_file)


Debug: Loading and parsing file: ../../SPL-Open-Data/basketball/freethrow/data/P0001/BB_FT_P0001_T0001.json
Debug: Trial ID: T0001, Result: 0, Release Frame: None
Debug: Created DataFrame for Trial ID: T0001
Debug: Unique columns in DataFrame:
Index(['trial_id', 'result', 'landing_x', 'landing_y', 'entry_angle',
       'frame_time', 'ball_x', 'ball_y', 'ball_z', 'R_EYE_x', 'R_EYE_y',
       'R_EYE_z', 'L_EYE_x', 'L_EYE_y', 'L_EYE_z', 'NOSE_x', 'NOSE_y',
       'NOSE_z', 'R_EAR_x', 'R_EAR_y', 'R_EAR_z', 'L_EAR_x', 'L_EAR_y',
       'L_EAR_z', 'R_SHOULDER_x', 'R_SHOULDER_y', 'R_SHOULDER_z',
       'L_SHOULDER_x', 'L_SHOULDER_y', 'L_SHOULDER_z', 'R_ELBOW_x',
       'R_ELBOW_y', 'R_ELBOW_z', 'L_ELBOW_x', 'L_ELBOW_y', 'L_ELBOW_z',
       'R_WRIST_x', 'R_WRIST_y', 'R_WRIST_z', 'L_WRIST_x', 'L_WRIST_y',
       'L_WRIST_z', 'R_HIP_x', 'R_HIP_y', 'R_HIP_z', 'L_HIP_x', 'L_HIP_y',
       'L_HIP_z', 'R_KNEE_x', 'R_KNEE_y', 'R_KNEE_z', 'L_KNEE_x', 'L_KNEE_y',
       'L_KNEE_z', 'R_ANKLE_x', 'R_ANKL

In [14]:
import matplotlib.pyplot as plt

# Function to label each frame with the phase of the shot
def label_shot_phases(df, release_frame, debug=False):
    df['phase'] = 'pre-release'
    if release_frame is not None:
        df.loc[df['frame_time'] >= release_frame, 'phase'] = 'post-release'
        df.loc[df['frame_time'] == release_frame, 'phase'] = 'release'
    
    if debug:
        print("Debug: Shot phases labeled.")
        print("Phase distribution:\n", df['phase'].value_counts())
    
    return df


# Function to identify and validate release coordinates
def identify_and_validate_release(df, release_frame, debug=False):
    if release_frame in df.index:
        release_data = df.loc[release_frame]
        df['release_x'], df['release_y'], df['release_z'] = release_data['ball_x'], release_data['ball_y'], release_data['ball_z']
        
        if debug:
            print(f"Debug: Release point at frame {release_frame} detected as:")
            print(f"rx={release_data['ball_x']}, ry={release_data['ball_y']}, rz={release_data['ball_z']}")
    else:
        if debug:
            print("Warning: Release frame not found or invalid. Release data may be missing.")
    
    return df


# Function to plot the ball path
def plot_ball_path(df, title="Ball Path Visualization"):
    fig = plt.figure(figsize=(12, 6))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot(df['ball_x'], df['ball_y'], df['ball_z'], label='Original Path', color='b', linewidth=2)
    
    if 'missing_ball' in df.columns:
        ax.scatter(
            df['ball_x'][df['missing_ball']], 
            df['ball_y'][df['missing_ball']], 
            df['ball_z'][df['missing_ball']], 
            color='r', label='Missing Data Points', s=20
        )
    
    ax.set_title(title)
    ax.set_xlabel('X Position')
    ax.set_ylabel('Y Position')
    ax.set_zlabel('Z Position (Height)')
    ax.legend()
    plt.show()


# Main function to run the updated preprocessing pipeline
def main(file_path, debug=False):
    parsed_data, release_frame = load_and_parse_json(file_path, debug=debug)
    if parsed_data is not None:
        df = create_dataframe(parsed_data, trial_id, result, landing_x, landing_y, entry_angle, debug=True)
        
        # Label shot phases
        df = label_shot_phases(df, release_frame, debug=debug)
        
        # Validate and extract release point
        df = identify_and_validate_release(df, release_frame, debug=debug)
        
        # Calculate ball speed and velocity
        df = calculate_ball_speed_and_velocity(df, debug=debug)
        
        # Calculate joint power
        joints_to_check = ['L_ANKLE', 'R_ANKLE', 'L_KNEE', 'R_KNEE', 'L_HIP', 'R_HIP', 'L_ELBOW', 'R_ELBOW', 'L_WRIST', 'R_WRIST']
        df = calculate_joint_power(df, joints_to_check, debug=debug)
        
        if debug:
            print("\nDebug: Final DataFrame after all calculations:\n", df.head(20))
        
        # Plot the ball path to visualize results
        plot_ball_path(df, title="Enhanced Ball Path Visualization")
    else:
        print("Error: Data not available for processing.")

# Run the main function for testing
test_file = "../../SPL-Open-Data/basketball/freethrow/data/P0001/BB_FT_P0001_T0001.json"
main(test_file, debug=True)


Debug: Loading and parsing file: ../../SPL-Open-Data/basketball/freethrow/data/P0001/BB_FT_P0001_T0001.json
Debug: Trial ID: T0001, Result: 0, Release Frame: None


ValueError: too many values to unpack (expected 2)