In [7]:
import os
import pandas as pd
import ipywidgets as widgets
import cv2
import mediapipe as mp
from IPython.display import Video, HTML, display
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
import joblib
from sklearn.impute import SimpleImputer
from tracking.video_processing_pipeline import process_video, body_parts

# configuration
video_dir = './input_videos'
tracking_data_dir = './tracking_data'
output_video_dir = './output_videos'

# Ensure all directories exist
dirs_to_check = [video_dir, tracking_data_dir, output_video_dir]
for dir in dirs_to_check:
    if not os.path.exists(dir):
        os.makedirs(dir)
        print(f"Directory created: {dir}")

# Function to select a video using an interactive widget
def interactive_select_video(video_dir):
    video_files = [f for f in os.listdir(video_dir) if f.endswith(('.mov', '.MOV', '.mp4'))]
    if not video_files:
        print("No video files found in the directory.")
        return None
    video_selector = widgets.Dropdown(
        options=video_files,
        description='Select Video:',
        disabled=False,
    )
    display(video_selector)
    return video_selector

# Use the interactive selection component to select a video
video_selector = interactive_select_video(video_dir)


Dropdown(description='Select Video:', options=('IMG_9340.MOV',), value='IMG_9340.MOV')

In [8]:
# Track the selected video
try:
    selected_video = video_selector.value
except AttributeError:
    print("No video was selected.")
    selected_video = None

if selected_video:
    # Process the selected video
    process_video(selected_video, video_dir, tracking_data_dir)

    # set the path to the tracking data file
    tracking_data_file = f'Tracking_{selected_video[:-4]}.csv'

    # Read and display a preview of the tracking data CSV
    tracking_data_df = pd.read_csv(os.path.join(tracking_data_dir, tracking_data_file))
    display(tracking_data_df.head())

INFO:root:Processing video: IMG_9340.MOV
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO:root:Tracking data for IMG_9340.MOV stored in ./tracking_data/Tracking_IMG_9340.csv


Unnamed: 0,Label,Timestamp,Nose_x,Nose_y,Nose_z,Left eye inner_x,Left eye inner_y,Left eye inner_z,Left eye_x,Left eye_y,...,Left heel_z,Right heel_x,Right heel_y,Right heel_z,Left foot index_x,Left foot index_y,Left foot index_z,Right foot index_x,Right foot index_y,Right foot index_z
0,,0,0.447181,0.186625,-0.454502,0.466466,0.167357,-0.481824,0.478228,0.166733,...,0.208295,0.527764,0.884256,0.747109,0.546632,0.973369,-0.081372,0.389055,0.922492,0.568898
1,,1,0.44744,0.189193,-0.463861,0.467123,0.170478,-0.498253,0.478337,0.169845,...,0.25073,0.527312,0.884385,0.692368,0.548656,0.978809,-0.046427,0.390017,0.923248,0.52004
2,,2,0.447379,0.191168,-0.478758,0.467114,0.172948,-0.515375,0.478157,0.17224,...,0.243222,0.527092,0.884469,0.687063,0.551741,0.980843,-0.052328,0.390481,0.923249,0.517358
3,,3,0.447204,0.191382,-0.487665,0.467088,0.173184,-0.523442,0.478086,0.172532,...,0.245198,0.527091,0.885141,0.690187,0.553133,0.981776,-0.04631,0.390478,0.923281,0.52312
4,,4,0.446104,0.191954,-0.475087,0.465854,0.173861,-0.520072,0.476621,0.173205,...,0.256576,0.527125,0.885262,0.690394,0.55415,0.982438,-0.032706,0.390477,0.923279,0.525715


In [9]:
# Generate Video Tracking Preview

# Function to save video with landmarks from tracking data
def save_landmarks_on_video(video_path, output_video_path, tracking_data_path):
    # Initialize MediaPipe Pose
    mp_pose = mp.solutions.pose
    mp_drawing = mp.solutions.drawing_utils

    # Open the video file
    cap = cv2.VideoCapture(video_path)
    
    # Check if video opened successfully
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    # Get the width, height, and frame rate of the video frame
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Read the tracking data from CSV
    tracking_data = pd.read_csv(tracking_data_path)

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'avc1')  # or 'XVID'
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    # Process video and draw landmarks from tracking data
    for index, row in tracking_data.iterrows():
        # Read frame from video
        ret, frame = cap.read()
        if not ret:
            break

        # Draw landmarks on the frame
        for landmark in mp.solutions.pose.PoseLandmark:
            landmark_name = landmark.name.lower().replace('_', ' ')  # Convert to lowercase and replace underscores with spaces
            landmark_name = landmark_name.capitalize()  # Capitalize only the first letter of the first word
            landmark_x = f'{landmark_name}_x'
            landmark_y = f'{landmark_name}_y'
            if landmark_x in tracking_data.columns and landmark_y in tracking_data.columns and not pd.isna(row[landmark_x]) and not pd.isna(row[landmark_y]):
                x = int(row[landmark_x] * width)
                y = int(row[landmark_y] * height)
                # Check if the current landmark is one of the hips
                if landmark in [mp.solutions.pose.PoseLandmark.LEFT_HIP, mp.solutions.pose.PoseLandmark.RIGHT_HIP]:
                    # Highlight hips in red
                    cv2.circle(frame, (x, y), 15, (0, 0, 255), -1)
                else:
                    # Draw other landmarks in green
                    cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)


        # Write the frame with landmarks to the output video file
        out.write(frame)

    # Release everything when job is finished
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"Video saved to {output_video_path}")

# Call the function to save the video with landmarks
video_path = os.path.join(video_dir, selected_video)

# Prepare the output video path
video_name = os.path.basename(video_path)
output_video_name = os.path.splitext(video_name)[0] + "_landmarks.mp4"
output_video_path = os.path.join(output_video_dir, output_video_name)
tracking_data_path = os.path.join(tracking_data_dir, tracking_data_file)
save_landmarks_on_video(video_path, output_video_path, tracking_data_path)

# Output
# Create an HTML string with the video embedded
video_html = f'''
Video saved to 
<a href='{output_video_path}' target='_blank'>{output_video_path}</a> <br><br>
<video height="500" controls allowfullscreen>
  <source src="{output_video_path}" type="video/mp4">
Your browser does not support the video tag.
</video>
'''
display(HTML(video_html))

Video saved to ./output_videos/IMG_9340_landmarks.mp4


In [10]:
# Interactive Plot with lables

# uncomment to use a different video than the one used above
video_path = tracking_data_path

# Usage
csv_file_path = tracking_data_path
ignore_start = 0  # Number of frames to ignore at the start
ignore_end = 0  # Number of frames to ignore at the end

def plot_movement_data(csv_file_path, ignore_start=0, ignore_end=0, title="Average Hip Height over Time"):
    # Read the data from the CSV file
    data = pd.read_csv(csv_file_path, delimiter=",")

    # Ignore specified amount of frames at the beginning and/or end
    data = data[ignore_start : -ignore_end or None]

    # Calculate the average hip height
    data["Average_Hip_Height"] = (data["Left hip_y"] + data["Right hip_y"]) / 2

    # Create a scatter plot
    fig = go.Figure()

    # Check if 'Label' column exists and plot accordingly
    if 'Label' in data.columns and data['Label'].notna().any():
        # Define custom color mapping for movement types
        custom_color_mapping = {
            "Pause": "orange",
            "Ascending": "green",
            "Descending": "red",
            "Transition": "yellow",
            "Unknown": "grey",
        }

        for label, color in custom_color_mapping.items():
            mask = data["Label"] == label
            fig.add_trace(
                go.Scatter(
                    x=data.loc[mask, "Timestamp"],
                    y=data.loc[mask, "Average_Hip_Height"],
                    mode="markers",
                    name=label,
                    marker_color=color,
                    hovertemplate="x: %{x}",
                )
            )
    else:
        # Plot the average hip height over time in blue if no 'Label' column
        fig.add_trace(
            go.Scatter(
                x=data["Timestamp"],
                y=data["Average_Hip_Height"],
                mode="markers",
                name="Average Hip Height",
                marker_color="blue",
                hovertemplate="x: %{x}",
            )
        )

    # Get the x values of the first and last data points
    first_x = data["Timestamp"].iloc[0]
    last_x = data["Timestamp"].iloc[-1]

    # Add vertical lines at the first and last data points
    fig.add_shape(
        type="line",
        x0=first_x,
        y0=0,
        x1=first_x,
        y1=1,
        yref="paper",
        line=dict(color="Green", width=2),
    )
    fig.add_shape(
        type="line",
        x0=last_x,
        y0=0,
        x1=last_x,
        y1=1,
        yref="paper",
        line=dict(color="Red", width=2),
    )

    # Add annotations for the first and last data points
    fig.add_annotation(
        x=first_x, y=0.05, text=f"Start: {first_x}", showarrow=False, yref="paper"
    )
    fig.add_annotation(
        x=last_x, y=0.05, text=f"End: {last_x}", showarrow=False, yref="paper"
    )

    # Increase the number of grid lines
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="LightGrey")
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="LightGrey", autorange="reversed")

    # Remove the light blue background
    fig.update_layout(
        autosize=True,  
        # width=1000,  # Width of the figure in pixels
        height=600,  # Height of the figure in pixels
        hovermode="x", # Hovermode for the cursor
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        title=f"{title}"  # Add a title to the plot
    )

    fig.show()

# Call the function to plot the data
plot_movement_data(csv_file_path, ignore_start, ignore_end, title=f"Average Hip Height over Time - {os.path.basename(csv_file_path)}")


# Call the function to plot the data
plot_movement_data( "../../data/training/labled_data/Tracking_video01_labled.csv", ignore_start, ignore_end, title=f"Labled Example Data - {os.path.basename(csv_file_path)}")


In [11]:
# Apply models (overfitted)
models_dir = "../../data/models"

# Define a specific model file name if you want to run it for one model, else set to None
specific_model_file = None  # e.g., "KNeighborsClassifier.pkl"

# Define a blacklist of models to exclude
blacklist = ['imputer.pkl']  # Add model filenames to exclude

# Get all the .pkl files from the models directory, exclude blacklisted ones, or just the specific one
model_files = [specific_model_file] if specific_model_file else [f for f in os.listdir(models_dir) if f.endswith('.pkl') and f not in blacklist]

# Path to the tracking data
csv_file_name = os.path.basename(csv_file_path)
print(f"Making predictions for '{csv_file_name}'...")

# Path to save the new data with predictions
predictions_base_path = "./predictions"
csv_file_subfolder = os.path.splitext(csv_file_name)[0]
new_data_path = os.path.join(predictions_base_path, csv_file_subfolder)

# Create the new data directory if it doesn't exist
if not os.path.exists(new_data_path):
    os.makedirs(new_data_path)

# Load the tracking data
new_data = pd.read_csv(csv_file_path)

# If the new data includes the 'Label' column, drop it
if "Label" in new_data.columns:
    new_data = new_data.drop("Label", axis=1)

# Iterate over each model file and make predictions
for model_file in model_files:
    model_path = os.path.join(models_dir, model_file)
    
    # Load the model
    loaded_model = joblib.load(model_path)

    # Load the fitted imputer if needed
    # imputer_path = os.path.join(models_dir, 'imputer.pkl')
    # imputer = joblib.load(imputer_path)

    # Transform the new data using the fitted imputer if needed
    # new_data_transformed = imputer.transform(new_data)

    # Use the loaded model to make predictions
    predictions = loaded_model.predict(new_data)

    # Add the predictions back into the 'Label' column
    new_data_with_predictions = new_data.copy()
    new_data_with_predictions["Label"] = predictions

    # Save the DataFrame with the new labels back to a CSV file in the new data subfolder
    # Add "_predicted" to the file name
    predicted_file_name = f"{csv_file_subfolder}_{os.path.splitext(model_file)[0]}_predicted.csv"
    predicted_file_path = os.path.join(new_data_path, predicted_file_name)
    new_data_with_predictions.to_csv(predicted_file_path, index=False)

    # Plot the new data with predictions
    plot_movement_data(predicted_file_path, ignore_start, ignore_end, title=f"Prediction - {model_file}")

    print(f"Predictions using {model_file} saved to {predicted_file_path}")

Making predictions for 'Tracking_IMG_9340.csv'...



X has feature names, but GradientBoostingClassifier was fitted without feature names



Predictions using GradientBoostingClassifier.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_GradientBoostingClassifier_predicted.csv



X has feature names, but KNeighborsClassifier was fitted without feature names



Predictions using KNeighborsClassifier.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_KNeighborsClassifier_predicted.csv



X has feature names, but MLPClassifier was fitted without feature names



Predictions using MLPClassifier.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_MLPClassifier_predicted.csv



X has feature names, but LogisticRegression was fitted without feature names



Predictions using LogisticRegression.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_LogisticRegression_predicted.csv


Predictions using model.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_model_predicted.csv



X has feature names, but DecisionTreeClassifier was fitted without feature names



Predictions using DecisionTreeClassifier.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_DecisionTreeClassifier_predicted.csv



X has feature names, but DecisionTreeClassifier was fitted without feature names



Predictions using DecisionTreeClassifier2.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_DecisionTreeClassifier2_predicted.csv



X has feature names, but RandomForestClassifier was fitted without feature names



Predictions using RandomForest2.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_RandomForest2_predicted.csv



X has feature names, but SVC was fitted without feature names



Predictions using SVC.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_SVC_predicted.csv


In [24]:
import os
import pandas as pd
import numpy as np
import joblib
from sklearn.impute import SimpleImputer

# Assuming the necessary imports are done

# Function to create sequences from data
def create_sequences(features, window_size):
    feature_sequences = []
    for i in range(len(features) - window_size + 1):
        feature_sequences.append(features.iloc[i:(i + window_size)].values)
    return np.array(feature_sequences)

# Define the directory where models are stored
models_dir = "../../data/models/sequence/"

# Define a specific model file name if you want to run it for one model, else set to None
specific_model_file = None  # e.g., "KNeighborsClassifier.pkl"

# Define a blacklist of models to exclude
blacklist = ['imputer.pkl']  # Add model filenames to exclude

# Get all the .pkl files from the models directory
all_model_files = [f for f in os.listdir(models_dir) if f.endswith('.pkl') and f not in blacklist]

# If a specific model file is defined, only use that one
model_files = [specific_model_file] if specific_model_file else all_model_files

# Load the tracking data
new_data = pd.read_csv(csv_file_path)

# Prepare the data as per the model's training conditions
new_data['Left hip_y_diff'] = new_data['Left hip_y'].diff().fillna(0)
new_data['Right hip_y_diff'] = new_data['Right hip_y'].diff().fillna(0)

# If the new data includes the 'Label' column, drop it
if "Label" in new_data.columns:
    new_data = new_data.drop("Label", axis=1)

# Select only the required features from the new data
# required_features = ['Left hip_y', 'Right hip_y', 'Left hip_y_diff', 'Right hip_y_diff']
required_features = ['Left hip_y', 'Right hip_y']
new_data_for_sequences = new_data[required_features]

# Create sequences from the new data
window_size = 4  # Ensure this matches the window size used during training
new_data_sequences = create_sequences(new_data_for_sequences, window_size)

# Flatten the sequences for compatibility with traditional models
new_data_flat = new_data_sequences.reshape(new_data_sequences.shape[0], -1)

# Load the fitted imputer if needed and apply it to the flattened new data
imputer_path = os.path.join(models_dir, 'imputer.pkl')
if os.path.exists(imputer_path):
    imputer = joblib.load(imputer_path)
    new_data_imputed = imputer.transform(new_data_flat)
else:
    new_data_imputed = new_data_flat

# Iterate over each model file and make predictions
for model_file in model_files:
    model_path = os.path.join(models_dir, model_file)
    loaded_model = joblib.load(model_path)

    # Make predictions
    predictions = loaded_model.predict(new_data_imputed)

    # Handle predictions (e.g., assigning predictions to the last element of each sequence)
    # This part needs to be adjusted based on your specific requirements
    # Example: Assigning predictions to the last element of each sequence
    for i, prediction in enumerate(predictions):
        if i + window_size - 1 < len(new_data):
            new_data.at[i + window_size - 1, 'Label'] = prediction

    # Save the DataFrame with the new labels back to a CSV file in the new data subfolder
    predicted_file_name = f"{csv_file_subfolder}_{os.path.splitext(model_file)[0]}_predicted.csv"
    predicted_file_path = os.path.join(new_data_path, predicted_file_name)
    new_data.to_csv(predicted_file_path, index=False)

    print(f"Predictions using {model_file} saved to {predicted_file_path}")
    # Plot the new data with predictions
    plot_movement_data(predicted_file_path, ignore_start, ignore_end, title=f"Prediction - {model_file}")

Predictions using GradientBoostingClassifier.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_GradientBoostingClassifier_predicted.csv


Predictions using KNeighborsClassifier.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_KNeighborsClassifier_predicted.csv


Predictions using MLPClassifier.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_MLPClassifier_predicted.csv


Predictions using LogisticRegression.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_LogisticRegression_predicted.csv


Predictions using RandomForestClassifier.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_RandomForestClassifier_predicted.csv


Predictions using DecisionTreeClassifier.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_DecisionTreeClassifier_predicted.csv


Predictions using SVC.pkl saved to ./predictions/Tracking_IMG_9340/Tracking_IMG_9340_SVC_predicted.csv
