In [28]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np
import plotly.express as px

In [29]:
biometrics_df = df = pd.read_csv('biometrics_data.csv', delimiter=';')
biometrics_df

Unnamed: 0,walking_type,video_id,person_id,camera_type,steps_length,avg_step_length,speed,time,distance,asymmetry
0,abnormal,limping1,0,front_multiple,[0.14770962 0.14714764 0.14685819 0.14727776 0...,0.158174,0.016143,324,5.230202,0.360017
1,abnormal,limping1,1,front_multiple,[0.2368916 0.15101376 0.15024317 0.15141071 0...,0.161374,0.0259,346,8.961452,0.802118
2,normal,DTU1,0,front,[0.11692207 0.13675987 0.14435628 0.15955304 0...,0.14413,0.043713,27,1.180249,0.039018
3,normal,jake,0,side,[0.55222212 0.61456363 0.56650746 0.60724509],0.585135,0.030833,58,1.788316,0.856048
4,normal,guy,0,side,[0.43892157 0.64519218 0.62339924 0.5295839 ],0.559274,0.035964,50,1.798175,0.714935
5,normal,karolina,0,frontback,[0.12800904 0.07777523 0.10063381 0.09813364 0...,0.112751,0.014958,700,10.470564,-0.023648
6,abnormal,hemiplegic_gait4,0,front,[0.278922 0.31759438 0.21406927 0.2110939 0...,0.26065,0.023243,459,10.668369,-0.628305
7,normal,athletic_male,0,front,[0.18562702 0.14532067 0.16222869 0.16312455 0...,0.141665,0.02179,76,1.656012,0.02058
8,normal,london_man,0,back,[0.12283621 0.15875164 0.11985308 0.04480141 0...,0.1039,0.004953,164,0.812266,0.415574
9,abnormal,limping_11,0,front_multiple,[0.14770962 0.14714764 0.14685819 0.14727776 0...,0.158174,0.016143,324,5.230202,0.360017


In [30]:
# Somehow the steps_length column is a string of a list of floats, so we need to convert it to a list of floats
biometrics_df["steps_length"] = biometrics_df["steps_length"].apply(
    lambda x: np.fromstring(x[1:-1], sep=" ")
)

# Rename side by side video
biometrics_df.loc[(biometrics_df['video_id'] == 'drunk_woman') & (biometrics_df['person_id'] == 0), 'camera_type'] = 'side'
biometrics_df.loc[(biometrics_df['video_id'] == 'drunk_woman') & (biometrics_df['person_id'] == 1), 'camera_type'] = 'front'

# Remove outlier
biometrics_df = biometrics_df[ (biometrics_df['video_id'] != 'DTU1') & (biometrics_df['asymmetry'] > -10.0)]

# Rename labels
replace = {"side2": "side", "front_multiple": "front", "back": "front", 'frontback': 'front'}
biometrics_df["camera_type"].replace(replace, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [31]:
biometrics_df

Unnamed: 0,walking_type,video_id,person_id,camera_type,steps_length,avg_step_length,speed,time,distance,asymmetry
0,abnormal,limping1,0,front,"[0.14770962, 0.14714764, 0.14685819, 0.1472777...",0.158174,0.016143,324,5.230202,0.360017
1,abnormal,limping1,1,front,"[0.2368916, 0.15101376, 0.15024317, 0.15141071...",0.161374,0.0259,346,8.961452,0.802118
3,normal,jake,0,side,"[0.55222212, 0.61456363, 0.56650746, 0.60724509]",0.585135,0.030833,58,1.788316,0.856048
4,normal,guy,0,side,"[0.43892157, 0.64519218, 0.62339924, 0.5295839]",0.559274,0.035964,50,1.798175,0.714935
5,normal,karolina,0,front,"[0.12800904, 0.07777523, 0.10063381, 0.0981336...",0.112751,0.014958,700,10.470564,-0.023648
6,abnormal,hemiplegic_gait4,0,front,"[0.278922, 0.31759438, 0.21406927, 0.2110939, ...",0.26065,0.023243,459,10.668369,-0.628305
7,normal,athletic_male,0,front,"[0.18562702, 0.14532067, 0.16222869, 0.1631245...",0.141665,0.02179,76,1.656012,0.02058
8,normal,london_man,0,front,"[0.12283621, 0.15875164, 0.11985308, 0.0448014...",0.1039,0.004953,164,0.812266,0.415574
9,abnormal,limping_11,0,front,"[0.14770962, 0.14714764, 0.14685819, 0.1472777...",0.158174,0.016143,324,5.230202,0.360017
10,abnormal,limping_11,1,front,"[0.2368916, 0.15101376, 0.15024317, 0.15141071...",0.161374,0.0259,346,8.961452,0.802118


In [33]:
fig = px.scatter_3d(biometrics_df, x='avg_step_length', y='speed', z='asymmetry', color='camera_type', symbol='walking_type', width=1000, height=1000)
fig.show()