
# Data Analysis for Walking Sessions

This notebook provides a data analysis exercise based on imported data from Optitrack and RGBD systems, used during filming sessions of people walking up and down in a room.


In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def load_and_process_data(filename):
    # Load data from CSV
    data = pd.read_csv(filename)
    
    # Reshape the data
    body_parts = data.iloc[0, 2:].values
    types = data.iloc[1, 2:].values
    details = data.iloc[2, 2:].values
    
    columns = [f"{body_part} {data_type} {detail}" for body_part, data_type, detail in zip(body_parts, types, details)]
    correctly_reshaped_data = pd.DataFrame(data.values[3:, 2:], columns=columns, dtype=float)
    correctly_reshaped_data.insert(0, 'Frame', data['Unnamed: 0'].values[3:].astype(int))
    correctly_reshaped_data.insert(1, 'Time(Seconds)', data['Name'].values[3:].astype(float))
    
    # Filter and rename columns
    relevant_columns = ['Frame', 'Time(Seconds)', 
                        'An_LHeel Position X', 'An_LHeel Position Y', 'An_LHeel Position Z',
                        'An_RHeel Position X', 'An_RHeel Position Y', 'An_RHeel Position Z']
    heel_data_filtered = correctly_reshaped_data[relevant_columns]
    heel_data_filtered.columns = ['Frame', 'Time(Seconds)', 
                                  'Left Heel X', 'Left Heel Y', 'Left Heel Z',
                                  'Right Heel X', 'Right Heel Y', 'Right Heel Z']
    
    # Convert data types
    heel_data_filtered = heel_data_filtered.convert_dtypes()
    
    # Calculate step length
    heel_data_filtered['Step Length'] = np.sqrt(
        (heel_data_filtered['Right Heel X'] - heel_data_filtered['Left Heel X'])**2 +
        (heel_data_filtered['Right Heel Y'] - heel_data_filtered['Left Heel Y'])**2 +
        (heel_data_filtered['Right Heel Z'] - heel_data_filtered['Left Heel Z'])**2
    )
    
    return heel_data_filtered

def visualize_step_length(data):
    # Plotting step length over time
    plt.figure(figsize=(14, 6))
    plt.plot(data['Frame'], data['Step Length'], label='Step Length', color='blue')
    plt.title('Step Length Over the Session')
    plt.xlabel('Frame')
    plt.ylabel('Step Length (in meters)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Usage:
# data = load_and_process_data("path_to_your_file.csv")
# visualize_step_length(data)


## Optitrack Calculations

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set Variable Names
file_path_optitrack = 'Your_Optitrack_File.csv'  # Update this path as needed
patient_name_optitrack = 'PatientName' # Update this name as needed

# 1. Load the Optitrack data
optitrack_data = pd.read_csv(file_path_optitrack, header=None)

# 2. Extract position data and reshape the dataframe
body_points = [val for i, val in enumerate(optitrack_data.iloc[0]) if pd.notna(val) and "Name" not in val]
body_point_cols = {}
for i, point in enumerate(body_points):
    start_index = optitrack_data.columns[optitrack_data.iloc[0] == point][0]
    body_point_cols[point] = list(range(start_index + 2, start_index + 5))
position_data_list = [optitrack_data.iloc[:, body_point_cols[point]] for point in body_points]
position_data_optitrack = pd.concat(position_data_list, axis=1)
new_columns = []
for point in body_points:
    for coord in ['X', 'Y', 'Z']:
        new_columns.append(f"{point}_{coord}")
position_data_optitrack.columns = new_columns
position_data_optitrack = position_data_optitrack.drop([0, 1, 2]).reset_index(drop=True)

# 3. Convert heel position columns to float type and compute step length
heel_columns_float = [f" {patient_name_optitrack}_LHeel_X", f" {patient_name_optitrack}_LHeel_Y", 
                      f" {patient_name_optitrack}_LHeel_Z", f" {patient_name_optitrack}_RHeel_X", 
                      f" {patient_name_optitrack}_RHeel_Y", f" {patient_name_optitrack}_RHeel_Z"]
position_data_optitrack[heel_columns_float] = position_data_optitrack[heel_columns_float].astype(float)
position_data_optitrack['Step_Length'] = np.sqrt(
    (position_data_optitrack[f" {patient_name_optitrack}_RHeel_X"] - position_data_optitrack[f" {patient_name_optitrack}_LHeel_X"])**2 +
    (position_data_optitrack[f" {patient_name_optitrack}_RHeel_Y"] - position_data_optitrack[f" {patient_name_optitrack}_LHeel_Y"])**2 +
    (position_data_optitrack[f" {patient_name_optitrack}_RHeel_Z"] - position_data_optitrack[f" {patient_name_optitrack}_LHeel_Z"])**2
)

# 4. Save the processed Optitrack position data to a CSV file
position_data_optitrack.to_csv(f'{patient_name_optitrack}_optitrack_position_data_processed.csv', index=False)

# Visualize the step length over frames
plt.figure(figsize=(12, 6))
plt.plot(patient_name_optitrack['Step_Length'])
plt.title("Step Length over Frames (Optitrack Data)")
plt.xlabel("Frame")
plt.ylabel("Step Length (units)")
plt.grid(True)
plt.savefig(f'{patient_name_optitrack}_optitrack_step_length.png')  # Save the plot as an image
plt.show()

# Save the data to a CSV file
position_data_optitrack.to_csv(f'{patient_patient_name_optitrackname}_optitrack_position_data.csv', index=False)

## RGBD Calculations

In [None]:
# Authors: Anesu Chakaingesu
# Date created: 2023-10-26
# Description: This script is used to calculate step lengths from RGBD data.
# Usage: python3 rgbd_calculations.py
# Parameters:
#   - patient_name: The name of the patient (e.g. Eric)
#   - filename: The name of the CSV file containing the RGBD data
# Output: A CSV file containing the RGBD data with step lengths calculated.
# Notes:
#   - The RGBD data should be in the same directory as this script.
#   - The RGBD data should be in the same format as the data exported from the
#     rgbd_data_processing.py script.
#   - The CSV file containing the RGBD data should be named
#     <patient_name>_BLAZEPOSE_points.csv
#   - The CSV file containing the RGBD data should have the following columns:
#       - Frame
#       - Time(Seconds)
#       - DavidPagnon.etc.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 1. Load the RGBD dataset
file_path_rgbd = 'Your_RGBD_File.csv'  # Update this path as needed
rgbd_data = pd.read_csv(file_path_rgbd, header=None)

# 2. Extract positional data for relevant keypoints
rgbd_data.columns = rgbd_data.iloc[2]
rgbd_data = rgbd_data[3:].reset_index(drop=True)
rgbd_data = rgbd_data.apply(pd.to_numeric, errors='coerce')

# 3. Calculate step lengths (assuming 'LHeel' and 'RHeel' are the relevant columns)
rgbd_data['Left_Step_Length'] = np.sqrt(
    (rgbd_data['LHeel'].iloc[:, 0] - rgbd_data['RHeel'].iloc[:, 0])**2 +
    (rgbd_data['LHeel'].iloc[:, 1] - rgbd_data['RHeel'].iloc[:, 1])**2
)
rgbd_data['Right_Step_Length'] = np.sqrt(
    (rgbd_data['RHeel'].iloc[:, 0] - rgbd_data['LHeel'].iloc[:, 0])**2 +
    (rgbd_data['RHeel'].iloc[:, 1] - rgbd_data['LHeel'].iloc[:, 1])**2
)

# 4. Save the processed RGBD position data to a CSV file
rgbd_data.to_csv('Your_Output_Path_RGBD_Processed.csv', index=False)

# Plotting step lengths
plt.figure(figsize=(12, 6))
plt.plot(rgbd_data['Frame'], rgbd_data['Left_Step_Length'], label='Left Step Length', color='blue')
plt.plot(rgbd_data['Frame'], rgbd_data['Right_Step_Length'], label='Right Step Length', color='red')
plt.legend()
plt.title('RGBD Step Lengths over Time')
plt.xlabel('Frame')
plt.ylabel('Step Length')
plt.grid(True)
plt.savefig('rgbd_step_lengths.png')
plt.show()

old_rgbd_script = """
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def extract_rgbd_position_data(dataset):
    # Extracting positional data
    dataset.columns = dataset.iloc[2]
    dataset = dataset[3:]
    dataset = dataset.reset_index(drop=True)
    dataset = dataset.apply(pd.to_numeric, errors='coerce')
    return dataset

def calculate_rgbd_step_length(data):
    # Getting step lengths
    data['Left_Step_Length'] = np.sqrt(
        (data['DavidPagnon.13'] - data['DavidPagnon.22'])**2 +
        (data['DavidPagnon.14'] - data['DavidPagnon.23'])**2
    )
    
    data['Right_Step_Length'] = np.sqrt(
        (data['DavidPagnon.16'] - data['DavidPagnon.25'])**2 +
        (data['DavidPagnon.17'] - data['DavidPagnon.26'])**2
    )
    
    return data

# Load the dataset
rgbd_data = pd.read_csv('eric4_BLAZEPOSE_points.csv', header=None)

# Extract positional data for relevant keypoints
rgbd_position_data = extract_rgbd_position_data(rgbd_data)

# Calculate step lengths
rgbd_position_data = calculate_rgbd_step_length(rgbd_position_data)

# Plotting step lengths
plt.figure(figsize=(12, 6))
plt.plot(rgbd_position_data['DavidPagnon.1'], rgbd_position_data['Left_Step_Length'], label='Left Step Length', color='blue')
plt.plot(rgbd_position_data['DavidPagnon.1'], rgbd_position_data['Right_Step_Length'], label='Right Step Length', color='red')
plt.legend()
plt.title('RGBD Step Lengths')
plt.xlabel('Time (s)')
plt.ylabel('Step Length (units)')
plt.savefig('rgbd_step_lengths.png')
plt.show()

# Save the data to a CSV file for further analysis
rgbd_position_data.to_csv('eric4_BLAZEPOSE_points_processed.csv', index=False)
"""

## Comparisons

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def load_and_normalize_data(optitrack_path, rgbd_path):
    # Load the datasets
    optitrack_data = pd.read_csv(optitrack_path)
    rgbd_data = pd.read_csv(rgbd_path)
    
    # Normalize the RGBD data to match the Optitrack data range
    scale_factor = optitrack_data['Step_Length'].max() / (rgbd_data['Left_Step_Length'].max() - rgbd_data['Left_Step_Length'].min())
    rgbd_data['Left_Step_Length'] *= scale_factor
    rgbd_data['Right_Step_Length'] *= scale_factor
    
    return optitrack_data, rgbd_data

def plot_comparison(optitrack_data, rgbd_data):
    # Plotting the step lengths from both sources for comparison
    plt.figure(figsize=(14, 7))
    plt.plot(optitrack_data['Step_Length'], label='Optitrack Step Length', color='green')
    plt.plot(rgbd_data['Left_Step_Length'], label='RGBD Left Step Length', color='blue')
    plt.plot(rgbd_data['Right_Step_Length'], label='RGBD Right Step Length', color='red')
    plt.legend()
    plt.title('Step Length Comparison: Optitrack vs. RGBD')
    plt.xlabel('Time/Frames')
    plt.ylabel('Normalized Step Length')
    plt.grid(True)
    plt.show()

# Provide paths to your CSV files
optitrack_path = 'your_optitrack_data_path.csv'
rgbd_path = 'your_rgbd_data_path.csv'

optitrack_data, rgbd_data = load_and_normalize_data(optitrack_path, rgbd_path)
plot_comparison(optitrack_data, rgbd_data)

# Save the normalized RGBD data to a CSV file
rgbd_data.to_csv('your_output_path_rgbd_normalized.csv', index=False)

# Save the comparison plot to an image file
plt.savefig('step_length_comparison.png')



## Comprehensive Comparison

In [None]:
# Generate the consolidated comparison script

consolidated_script = """
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks

def load_and_normalize_data(optitrack_path, rgbd_path):
    # Load the datasets
    optitrack_data = pd.read_csv(optitrack_path)
    rgbd_data = pd.read_csv(rgbd_path)
    
    # Normalize the RGBD data to match the Optitrack data range
    scale_factor = optitrack_data['Step_Length'].max() / (rgbd_data['Left_Step_Length'].max() - rgbd_data['Left_Step_Length'].min())
    rgbd_data['Left_Step_Length'] *= scale_factor
    rgbd_data['Right_Step_Length'] *= scale_factor
    
    return optitrack_data, rgbd_data

def extract_cross_section_modified(data, column_name):
    # Find peaks
    peaks, _ = find_peaks(data[column_name])
    peak_values = data[column_name].iloc[peaks]
    
    # Identify the first major spike in movement by finding the max change in step length
    first_spike_idx = data[column_name].diff().idxmax()
    
    # Get indices of 1st and 3rd highest peaks
    highest_peak_idx = peak_values.idxmax()
    third_highest_peak_idx = peak_values.nlargest(3).idxmin()
    
    # Extract cross-section
    start_idx = min(first_spike_idx, highest_peak_idx, third_highest_peak_idx)
    end_idx = max(highest_peak_idx, third_highest_peak_idx)
    
    return data.iloc[start_idx:end_idx+1]

# Provide paths to your CSV files
optitrack_path = 'your_optitrack_data_path.csv'
rgbd_path = 'your_rgbd_data_path.csv'

optitrack_data, rgbd_data = load_and_normalize_data(optitrack_path, rgbd_path)

# Side-by-Side Comparison
fig, axs = plt.subplots(1, 2, figsize=(18, 6), sharey=True)
axs[0].plot(optitrack_data['Step_Length'], color='green')
axs[0].set_title('Optitrack Step Length')
axs[0].set_xlabel('Frames')
axs[0].set_ylabel('Normalized Step Length')
axs[0].grid(True)
axs[1].plot(rgbd_data['Times'], rgbd_data['Left_Step_Length'], label='RGBD Left Step Length', color='blue')
axs[1].plot(rgbd_data['Times'], rgbd_data['Right_Step_Length'], label='RGBD Right Step Length', color='red')
axs[1].legend()
axs[1].set_title('RGBD Step Lengths')
axs[1].set_xlabel('Time (s)')
axs[1].grid(True)
plt.tight_layout()
plt.savefig('side_by_side_comparison.png')
plt.show()

# Extract cross-sections for both datasets
optitrack_cross_section = extract_cross_section_modified(optitrack_data, 'Step_Length')
rgbd_cross_section_left = extract_cross_section_modified(rgbd_data, 'Left_Step_Length')
rgbd_cross_section_right = extract_cross_section_modified(rgbd_data, 'Right_Step_Length')

# Interpolate the RGBD data
num_frames = len(optitrack_cross_section)
rgbd_left_interpolated = np.interp(np.linspace(0, len(rgbd_cross_section_left)-1, num_frames), 
                                   np.arange(len(rgbd_cross_section_left)), 
                                   rgbd_cross_section_left['Left_Step_Length'])
rgbd_right_interpolated = np.interp(np.linspace(0, len(rgbd_cross_section_right)-1, num_frames), 
                                    np.arange(len(rgbd_cross_section_right)), 
                                    rgbd_cross_section_right['Right_Step_Length'])

# Layered Comparison with Interpolated Data
plt.figure(figsize=(14, 7))
plt.plot(optitrack_cross_section.index, optitrack_cross_section['Step_Length'], label='Optitrack Step Length', color='green')
plt.plot(optitrack_cross_section.index, rgbd_left_interpolated, label='RGBD Left Step Length (Interpolated)', color='blue')
plt.plot(optitrack_cross_section.index, rgbd_right_interpolated, label='RGBD Right Step Length (Interpolated)', color='red')
plt.legend()
plt.title('Layered Comparison (Interpolated): Optitrack vs. RGBD')
plt.xlabel('Frames')
plt.ylabel('Normalized Step Length')
plt.grid(True)
plt.savefig('layered_comparison.png')
plt.show()
"""

# Display the consolidated script
print(consolidated_script)
