# Imports & Config

In [None]:
# Standard library imports
import os
import tempfile
import glob
import re

# Third-party imports for data manipulation
import pandas as pd
import numpy as np

# Third-party imports for machine learning
import joblib
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

# Third-party imports for data visualization
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# Third-party imports for video processing and computer vision
import cv2
import mediapipe as mp

# IPython imports for interactive widgets and display utilities
import ipywidgets as widgets
from IPython.display import Video, HTML, display, clear_output

# Local imports for video processing pipeline
from src.pipeline.tracking.video_processing_pipeline import process_video, body_parts

# Configuration
tracking_data_dir = './src/pipeline/tracking_data'
output_video_dir = './src/pipeline/output_videos'
predictions_path = "./src/pipeline/predictions"
models_dir = './data/models'
labeled_data_path = './data/training/labled_data'
tracking_data_path = '' # will be set by video tracking cell, otherwise define csv file here

# Ensure all directories exist
dirs_to_check = [tracking_data_dir, output_video_dir]
for dir in dirs_to_check:
    if not os.path.exists(dir):
        os.makedirs(dir)
        print(f"Directory created: {dir}")

# Video Tracking

In [None]:
# Define the file upload widget
file_selector = widgets.FileUpload(
    accept='.mp4, .mov',  # Specify file types
    multiple=False,  # Allow multiple files to be selected
    description='Select a video file',
    layout={'width': '400px'}
)

# Define a button widget
button = widgets.Button(
    description='Process Uploaded File',
    button_style='',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to process the uploaded file',
    icon='play',  # Button icon
    layout={'width': '400px'}
)

def save_uploaded_file(uploaded_file):
    file_name = uploaded_file['name']
    file_content = uploaded_file['content']
    with tempfile.NamedTemporaryFile(delete=False, suffix=file_name) as tmp_file:
        tmp_file.write(file_content)
        return tmp_file.name, os.path.dirname(tmp_file.name)

def save_landmarks_on_video(video_path, output_video_path):
    print("Creating video with landmarks... (this might take a while)")

    # Initialize MediaPipe Pose
    mp_pose = mp.solutions.pose
    mp_drawing = mp.solutions.drawing_utils

    # Open the video file
    cap = cv2.VideoCapture(video_path)
    
    # Check if video opened successfully
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    # Get the width, height, and frame rate of the video frame
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Read the tracking data from CSV
    tracking_data = pd.read_csv(tracking_data_path)

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'avc1')  # or 'XVID'
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    # Process video and draw landmarks from tracking data
    for index, row in tracking_data.iterrows():
        # Read frame from video
        ret, frame = cap.read()
        if not ret:
            break

        # Draw landmarks on the frame
        for landmark in mp.solutions.pose.PoseLandmark:
            landmark_name = landmark.name.lower().replace('_', ' ')  # Convert to lowercase and replace underscores with spaces
            landmark_name = landmark_name.capitalize()  # Capitalize only the first letter of the first word
            landmark_x = f'{landmark_name}_x'
            landmark_y = f'{landmark_name}_y'
            if landmark_x in tracking_data.columns and landmark_y in tracking_data.columns and not pd.isna(row[landmark_x]) and not pd.isna(row[landmark_y]):
                x = int(row[landmark_x] * width)
                y = int(row[landmark_y] * height)
                # Check if the current landmark is one of the hips
                if landmark in [mp.solutions.pose.PoseLandmark.LEFT_HIP, mp.solutions.pose.PoseLandmark.RIGHT_HIP]:
                    # Highlight hips in red
                    cv2.circle(frame, (x, y), 15, (0, 0, 255), -1)
                else:
                    # Draw other landmarks in green
                    cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)


        # Write the frame with landmarks to the output video file
        out.write(frame)

    # Release everything when job is finished
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"Video saved to {output_video_path}")

def process_and_display_tracking_data(tmp_file_path, tmp_file_dir):
    print("Processing video... (this might take a while)")
    process_video(os.path.basename(tmp_file_path), tmp_file_dir, tracking_data_dir)
    tracking_data_file = f'Tracking_{os.path.basename(tmp_file_path[:-4])}.csv'
    tracking_data_df = pd.read_csv(os.path.join(tracking_data_dir, tracking_data_file))
    display(tracking_data_df.head())
    # After tracking data is processed, save video with landmarks
    output_video_path = os.path.join(output_video_dir, f"{os.path.basename(tmp_file_path[:-4])}_landmarks.mp4")
    global tracking_data_path
    tracking_data_path = os.path.join(tracking_data_dir, tracking_data_file)
    save_landmarks_on_video(tmp_file_path, output_video_path)
    video_html = f'''
    Video with landmarks saved to 
    <a href='{output_video_path}' target='_blank'>{output_video_path}</a> <br><br>
    <video height="500" controls allowfullscreen>
      <source src="{output_video_path}" type="video/mp4">
    Your browser does not support the video tag.
    </video>
    '''
    display(HTML(video_html))

# Define an event handler for the button click event
def on_button_clicked(b):
    uploaded_files = file_selector.value
    if uploaded_files:
        uploaded_file = next(iter(uploaded_files))
        tmp_file_path, tmp_file_dir = save_uploaded_file(uploaded_file)
        process_and_display_tracking_data(tmp_file_path, tmp_file_dir)
    else:
        print("No file uploaded.")

# Attach the event handler to the button
button.on_click(on_button_clicked)

# Display the widgets
display(file_selector, button)


# Model Usage - Squat Phase Analysis

In [None]:
print("Analysing Squat Phase of Tracked Data: " + tracking_data_path)

def plot_movement_data(csv_file_path, title="Average Hip Height over Time"):
    # Read the data from the CSV file
    data = pd.read_csv(csv_file_path, delimiter=",")

    # Calculate the average hip height
    data["Average_Hip_Height"] = (data["Left hip_y"] + data["Right hip_y"]) / 2

    # Create a scatter plot
    fig = go.Figure()

    # Check if 'Label' column exists and plot accordingly
    if 'Label' in data.columns and data['Label'].notna().any():
        # Define custom color mapping for movement types
        custom_color_mapping = {
            "Pause": "orange",
            "Ascending": "green",
            "Descending": "red",
            "Transition": "yellow",
            "Unknown": "grey",
        }

        for label, color in custom_color_mapping.items():
            mask = data["Label"] == label
            fig.add_trace(
                go.Scatter(
                    x=data.loc[mask, "Timestamp"],
                    y=data.loc[mask, "Average_Hip_Height"],
                    mode="markers",
                    name=label,
                    marker_color=color,
                    hovertemplate="x: %{x}",
                )
            )
    else:
        # Plot the average hip height over time in blue if no 'Label' column
        fig.add_trace(
            go.Scatter(
                x=data["Timestamp"],
                y=data["Average_Hip_Height"],
                mode="markers",
                name="Average Hip Height",
                marker_color="blue",
                hovertemplate="x: %{x}",
            )
        )

    # Get the x values of the first and last data points
    first_x = data["Timestamp"].iloc[0]
    last_x = data["Timestamp"].iloc[-1]

    # Add vertical lines at the first and last data points
    fig.add_shape(
        type="line",
        x0=first_x,
        y0=0,
        x1=first_x,
        y1=1,
        yref="paper",
        line=dict(color="Green", width=2),
    )
    fig.add_shape(
        type="line",
        x0=last_x,
        y0=0,
        x1=last_x,
        y1=1,
        yref="paper",
        line=dict(color="Red", width=2),
    )

    # Add annotations for the first and last data points
    fig.add_annotation(
        x=first_x, y=0.05, text=f"Start: {first_x}", showarrow=False, yref="paper"
    )
    fig.add_annotation(
        x=last_x, y=0.05, text=f"End: {last_x}", showarrow=False, yref="paper"
    )

    # Increase the number of grid lines
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="LightGrey")
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="LightGrey", autorange="reversed")

    # Remove the light blue background
    fig.update_layout(
        autosize=True,  
        height=600,  
        hovermode="x", 
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        title=f"{title}"  
    )

    fig.show()

# Define a blacklist of models to exclude
blacklist = ['imputer.pkl']  

# Get all the folders from the models directory excluding the "old" folder
model_folders = [f.path for f in os.scandir(models_dir) if f.is_dir() and os.path.basename(f.path) != "old"]

# Dropdown for Folder Selection
folder_dropdown = widgets.Dropdown(
    options=[(os.path.basename(folder), folder) for folder in model_folders],
    description='Folder:',
    disabled=False,
    value=None,
)

# Dropdown for Model Selection, initially empty
model_dropdown = widgets.Dropdown(
    options=[],
    description='Model:',
    disabled=False,
)

# Function to update model dropdown based on folder selection
def update_model_dropdown(change):
    folder_path = change['new']
    model_files = [f for f in os.listdir(folder_path) if f.endswith('.pkl') and f not in blacklist]
    model_dropdown.options = [(file, os.path.join(folder_path, file)) for file in model_files]

# Watch for changes in folder selection to update models dropdown
folder_dropdown.observe(update_model_dropdown, names='value')

# Button for Starting Analysis
start_analysis_button = widgets.Button(
    description='Start Analysis',
    button_style='info',
    tooltip='Click to start the analysis with the selected model',
    icon='play'
)

# Display the widgets
display(folder_dropdown, model_dropdown, start_analysis_button)

# Function to create sequences from data
def create_sequences(features, window_size):
    feature_sequences = []
    for i in range(len(features) - window_size + 1):
        feature_sequences.append(features.iloc[i:(i + window_size)].values)
    return np.array(feature_sequences)

# Event Handler for Analysis Button
def on_analysis_button_clicked(b):
    # Clear the previous outputs
    clear_output(wait=True)
    display(folder_dropdown, model_dropdown, start_analysis_button)
    
    selected_model_path = model_dropdown.value
    print("Starting analysis with model:", selected_model_path)
    
    # Load the tracking data
    new_data = pd.read_csv(tracking_data_path)

    # Prepare the data as per the model's training conditions
    new_data['Left hip_y_diff'] = new_data['Left hip_y'].diff().fillna(0)
    new_data['Right hip_y_diff'] = new_data['Right hip_y'].diff().fillna(0)

    # If the new data includes the 'Label' column, drop it
    if "Label" in new_data.columns:
        new_data = new_data.drop("Label", axis=1)

    required_features = ['Left hip_y', 'Right hip_y']
    new_data_for_sequences = new_data[required_features]

    # Create sequences from the new data
    # Extract the window size from the directory name
    match = re.search(r'sequence-(\d+)f', selected_model_path)
    if match:
        window_size = int(match.group(1))
    else:
        window_size = 4  # Default value if no match is found
    new_data_sequences = create_sequences(new_data_for_sequences, window_size)

    # Flatten the sequences for compatibility with traditional models
    new_data_flat = new_data_sequences.reshape(new_data_sequences.shape[0], -1)

    # Load the fitted imputer if needed and apply it to the flattened new data
    imputer_path = os.path.join(models_dir, 'imputer.pkl')
    if os.path.exists(imputer_path):
        imputer = joblib.load(imputer_path)
        new_data_imputed = imputer.transform(new_data_flat)
    else:
        new_data_imputed = new_data_flat

    # Load the selected model and make predictions
    loaded_model = joblib.load(selected_model_path)
    predictions = loaded_model.predict(new_data_imputed)

    # Handle predictions (e.g., assigning predictions to the last element of each sequence)
    for i, prediction in enumerate(predictions):
        if i + window_size - 1 < len(new_data):
            new_data.at[i + window_size - 1, 'Label'] = prediction

    # Save the DataFrame with the new labels back to a CSV file in the new data subfolder
    predicted_file_name = f"{os.path.basename(tracking_data_path[:-4])}_{os.path.splitext(os.path.basename(selected_model_path))[0]}_predicted.csv"
    predicted_file_dir = os.path.join(predictions_path, os.path.basename(tracking_data_path[:-4]))
    predicted_file_path = os.path.join(predicted_file_dir, predicted_file_name)

    # Create the new data directory if it doesn't exist
    if not os.path.exists(predicted_file_dir):
        os.makedirs(predicted_file_dir)
    
    new_data.to_csv(predicted_file_path, index=False)

    print(f"Predictions using {os.path.basename(selected_model_path)} saved to {predicted_file_path}")

    # Plot the new data with predictions
    plot_movement_data(predicted_file_path, title=f"Prediction - {os.path.basename(selected_model_path)}")

# Attach the event handler to the button
start_analysis_button.on_click(on_analysis_button_clicked)



# Model Training (sequencial, time distributed)

In [None]:
# Find all CSV files in the folder
all_files = glob.glob(labeled_data_path + "/Tracking_video*_labled.csv")

# List to store the dataframes
li = []

# Read each CSV file and add it to the list
for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

# Combine all dataframes in the list into a single dataframe
data = pd.concat(li, axis=0, ignore_index=True)

# Function to create sequences from data
def create_sequences(features, labels, window_size):
    feature_sequences, label_sequences = [], []
    for i in range(len(features) - window_size + 1):
        feature_sequences.append(features.iloc[i:(i + window_size)].values)
        label_sequences.append(labels.iloc[i + window_size - 1])
    return np.array(feature_sequences), np.array(label_sequences)

# Create an input widget for window size
window_size_input = widgets.IntText(
    value=4,
    description='Frame Sequence Amount:',
    disabled=False,
    style={'description_width': 'initial'}  # Adjust the description width
)
display(window_size_input)

# Button to start model training
train_models_button = widgets.Button(
    description='Train Models',
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    icon='play'
)
display(train_models_button)

def on_train_models_button_clicked(b):
    # Use the window size from the input widget
    window_size = window_size_input.value
    
    # Adjust the models directory path based on the window size
    models_sequence_dir = f"{models_dir}/sequence-{window_size}f"
    
    # Ensure the models directory exists
    if not os.path.exists(models_sequence_dir):
        os.makedirs(models_sequence_dir)
    
    # Preparing the data with the specified window size
    data['Left hip_y_diff'] = data['Left hip_y'].diff().fillna(0)
    data['Right hip_y_diff'] = data['Right hip_y'].diff().fillna(0)
    
    X = data[['Left hip_y', 'Right hip_y']]
    y = data['Label']
    
    X_seq, y_seq = create_sequences(X, y, window_size)
    
    X_train_seq, X_test_seq, y_train_seq, y_test_seq = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)
    
    X_train_flat = X_train_seq.reshape(X_train_seq.shape[0], -1)
    X_test_flat = X_test_seq.reshape(X_test_seq.shape[0], -1)
    
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train_flat)
    X_test_scaled = scaler.transform(X_test_flat)
    
    imputer = SimpleImputer(strategy='mean')
    X_train_imputed = imputer.fit_transform(X_train_scaled)
    X_test_imputed = imputer.transform(X_test_scaled)
    
    models = [
        RandomForestClassifier(),
        LogisticRegression(),
        SVC(),
        DecisionTreeClassifier(),
        GradientBoostingClassifier(),
        KNeighborsClassifier(),
        MLPClassifier(max_iter=1000)
    ]
    
    for model in models:
        print(f"Training {model.__class__.__name__}...")
        model.fit(X_train_imputed, y_train_seq)
        y_pred = model.predict(X_test_imputed)
        accuracy = accuracy_score(y_test_seq, y_pred)
        print(f"{model.__class__.__name__} Accuracy: {accuracy}")
        print("Classification Report:\n", classification_report(y_test_seq, y_pred))
        model_save_path = os.path.join(models_sequence_dir, f"{model.__class__.__name__}.pkl")
        joblib.dump(model, model_save_path)
        print(f"Model saved to {model_save_path}")
        print("--------------------------------")

    print("Training complete!")

train_models_button.on_click(on_train_models_button_clicked)



# Model Evaluation

In [None]:
# Assuming labeled_data_path is defined and points to your labeled data directory
labeled_data_path = './data/training/labled_data'  # Update this path as necessary

# UI for selecting model directory is removed as per instructions

def load_and_evaluate_models():
    print(f"Evaluating models in sequence: {models_dropdown.value}")  # Print which model sequence is being evaluated
    
# UI for selecting model directory
models_dir = './data/models'  # Update this path as necessary
models_dirs = [f for f in os.listdir(models_dir) if os.path.isdir(os.path.join(models_dir, f)) and f != "old"]
models_dropdown = widgets.Dropdown(options=models_dirs, description='Folder:')
display(models_dropdown)

def load_and_evaluate_models(b):
    all_files = glob.glob(labeled_data_path + "/Tracking_video*_labled.csv")
    li = []

    for filename in all_files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)

    data = pd.concat(li, axis=0, ignore_index=True)

    # Assuming create_sequences is defined as in your training cell
    def create_sequences(features, labels, window_size):
        feature_sequences, label_sequences = [], []
        for i in range(len(features) - window_size + 1):
            feature_sequences.append(features.iloc[i:(i + window_size)].values)
            label_sequences.append(labels.iloc[i + window_size - 1])
        return np.array(feature_sequences), np.array(label_sequences)

    # Preprocessing steps
    data['Left hip_y_diff'] = data['Left hip_y'].diff().fillna(0)
    data['Right hip_y_diff'] = data['Right hip_y'].diff().fillna(0)

    X = data[['Left hip_y', 'Right hip_y']]
    y = data['Label']

    match = re.search(r'sequence-(\d+)f', models_dropdown.value)
    if match:
        window_size = int(match.group(1))
    else:
        window_size = 4  # Default value if no match is found

    X_seq, y_seq = create_sequences(X, y, window_size)

    X_train_seq, X_test_seq, y_train_seq, y_test_seq = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

    X_train_flat = X_train_seq.reshape(X_train_seq.shape[0], -1)
    X_test_flat = X_test_seq.reshape(X_test_seq.shape[0], -1)

    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train_flat)
    X_test_scaled = scaler.transform(X_test_flat)

    imputer = SimpleImputer(strategy='mean')
    X_train_imputed = imputer.fit_transform(X_train_scaled)
    X_test_imputed = imputer.transform(X_test_scaled)

    models_sequence_dir = os.path.join(models_dir, models_dropdown.value)
    
    if not os.path.exists(models_sequence_dir):
        print(f"Directory {models_sequence_dir} does not exist. Please ensure models are trained and saved correctly.")
    else:
        model_files = [f for f in os.listdir(models_sequence_dir) if f.endswith('.pkl') and f != "old"]
        if not model_files:
            print("No trained models found in the directory.")
        else:
            for model_file in model_files:
                model_path = os.path.join(models_sequence_dir, model_file)
                model = joblib.load(model_path)
                y_pred = model.predict(X_test_imputed)
                accuracy = accuracy_score(y_test_seq, y_pred)
                report = classification_report(y_test_seq, y_pred)
                print(f"Model: {model_file}")
                print(f"Accuracy: {accuracy}")
                print("Classification Report:\n", report)
                print("--------------------------------")

evaluate_button = widgets.Button(description="Start Analysis", button_style='info', icon='play')
evaluate_button.on_click(load_and_evaluate_models)
display(evaluate_button)
