Using the .describe() function, combine all selected data together for all videos

In [1]:
import pandas as pd
import os
import re

In [2]:
os.chdir('/Users/hanliyang/Documents/GitHub/Parkinson-Prediagnosis')

print(os.getcwd()) 

if not os.path.exists('Computer Vision/Results/All/'):
    print("Directory does not exist!")
else:
    print("Directory exists!")

/Users/hanliyang/Documents/GitHub/Parkinson-Prediagnosis
Directory exists!


### Patient Videos

In [5]:
df_stats_list = []

def extract_numbers(filename):
    match = re.match(r'p(\d+)v(\d+)', filename)
    if match:
        return int(match.group(1)), int(match.group(2))
    return filename


files = sorted(os.listdir('Computer Vision/Results/All/'), key=extract_numbers)

for file in files:
    file_path = os.path.join('Computer Vision/Results/All/', file)
    df = pd.read_csv(file_path)
    df = df.loc[abs(df['linear_acceleration']) <= 1000] # TODO: Threshold to remove outliers
    df_desc = df.describe().T

    df_desc['file'] = file
    df_desc['statistic'] = df_desc.index

    print(f"Processing file: {file_path}")
    print(f"DataFrame shape: {df.shape}")
    print(f"Statistics shape: {df_desc.shape}")

    df_stats_list.append(df_desc)

combined_desc = pd.concat(df_stats_list)
combined_desc = combined_desc[['file','statistic'] + [col for col in combined_desc.columns if col not in ['file','statistic']]]

# Print the number of files processed and the shape of the combined DataFrame
print(f"Combined description shape: {combined_desc.shape}")

# Save to CSV
combined_desc.to_csv('Computer Vision/Results/stats/all_stats_noOutliers.csv', index=False)

print(f"Combined description shape: {combined_desc.shape}")

Processing file: Computer Vision/Results/All/p1v1_results.csv
DataFrame shape: (3251, 14)
Statistics shape: (14, 10)
Processing file: Computer Vision/Results/All/p1v2_results.csv
DataFrame shape: (3669, 14)
Statistics shape: (14, 10)
Processing file: Computer Vision/Results/All/p2v1_results.csv
DataFrame shape: (3653, 14)
Statistics shape: (14, 10)
Processing file: Computer Vision/Results/All/p3v1_results.csv
DataFrame shape: (2321, 14)
Statistics shape: (14, 10)
Processing file: Computer Vision/Results/All/p3v2_results.csv
DataFrame shape: (3633, 14)
Statistics shape: (14, 10)
Processing file: Computer Vision/Results/All/p3v3_results.csv
DataFrame shape: (172, 14)
Statistics shape: (14, 10)
Processing file: Computer Vision/Results/All/p4v1_results.csv
DataFrame shape: (47, 14)
Statistics shape: (14, 10)
Processing file: Computer Vision/Results/All/p5v1_results.csv
DataFrame shape: (799, 14)
Statistics shape: (14, 10)
Processing file: Computer Vision/Results/All/p5v2_results.csv
DataFr

In [6]:
df_all_stats = pd.read_csv('Computer Vision/Results/stats/all_stats_noOutliers.csv')

stats = ['linear_acceleration']

df_all_stats_angularAcceleration = df_all_stats[df_all_stats['statistic'].isin(stats)].copy()

df_all_stats_angularAcceleration.to_csv('Computer Vision/Results/stats-chosen/all_stats_angularAcceleration_noOutliers.csv', index = False)
df_all_stats_angularAcceleration

Unnamed: 0,file,statistic,count,mean,std,min,25%,50%,75%,max
13,p1v1_results.csv,linear_acceleration,3251.0,0.256460,83.579216,-929.530200,-33.496734,0.627522,31.674588,909.212911
27,p1v2_results.csv,linear_acceleration,3669.0,0.026060,89.099275,-884.244640,-29.775455,-0.008015,31.135011,908.437040
41,p2v1_results.csv,linear_acceleration,3653.0,2.416231,156.019607,-975.705217,-38.115892,-0.368397,39.128005,925.827734
55,p3v1_results.csv,linear_acceleration,2321.0,-0.635259,137.205912,-900.560888,-68.732581,0.142421,71.989065,733.223359
69,p3v2_results.csv,linear_acceleration,3633.0,-0.046881,143.309620,-757.556312,-69.666487,1.067964,70.285821,809.499824
...,...,...,...,...,...,...,...,...,...,...
965,p33v1_results.csv,linear_acceleration,3619.0,-0.016729,52.629074,-417.625237,-18.476550,-0.005006,18.038535,775.788741
979,p34v1_results.csv,linear_acceleration,2981.0,-0.535278,145.047391,-996.155915,-41.230113,0.361926,42.287494,995.863979
993,p35v1_results.csv,linear_acceleration,3200.0,-0.473612,118.860712,-910.622542,-41.352167,0.788785,38.246494,999.975688
1007,p35v2_results.csv,linear_acceleration,3587.0,0.782955,129.533632,-788.092152,-48.014339,1.944467,51.732034,855.318534


### Normal People

In [4]:
df_np_stats_list = []

def extract_numbers(filename):
    match = re.match(r'np(\d+)v(\d+)', filename)
    if match:
        return int(match.group(1)), int(match.group(2))
    return filename


files = sorted(os.listdir('Computer Vision/Results/np/'), key=extract_numbers)

for file in files:
    file_path = os.path.join('Computer Vision/Results/np/', file)
    df = pd.read_csv(file_path)
    df_desc = df.describe().T

    df_desc['file'] = file
    df_desc['statistic'] = df_desc.index

    print(f"Processing file: {file_path}")
    print(f"DataFrame shape: {df.shape}")
    print(f"Statistics shape: {df_desc.shape}")

    df_np_stats_list.append(df_desc)

combined_desc = pd.concat(df_np_stats_list)
combined_desc = combined_desc[['file','statistic'] + [col for col in combined_desc.columns if col not in ['file','statistic']]]

# Print the number of files processed and the shape of the combined DataFrame
print(f"Combined description shape: {combined_desc.shape}")

# Save to CSV
combined_desc.to_csv('Computer Vision/Results/np_all_stats.csv', index=False)

print(f"Combined description shape: {combined_desc.shape}")

Processing file: Computer Vision/Results/np/np1v1_results.csv
DataFrame shape: (7313, 14)
Statistics shape: (14, 10)
Processing file: Computer Vision/Results/np/np1v2_results.csv
DataFrame shape: (7344, 14)
Statistics shape: (14, 10)
Combined description shape: (28, 10)
Combined description shape: (28, 10)


In [6]:
df_np_all_stats = pd.read_csv('Computer Vision/Results/np_all_stats.csv')

stats = ['linear_acceleration']

df_np_all_stats_angularAcceleration = df_np_all_stats[df_np_all_stats['statistic'].isin(stats)].copy()

df_np_all_stats_angularAcceleration.to_csv('Computer Vision/Results/np_all_stats_angularAcceleration.csv', index = False)
df_np_all_stats_angularAcceleration

Unnamed: 0,file,statistic,count,mean,std,min,25%,50%,75%,max
13,np1v1_results.csv,linear_acceleration,7311.0,0.0575,71.020173,-729.699033,-27.280563,0.496289,28.309368,677.486215
27,np1v2_results.csv,linear_acceleration,7342.0,0.013047,67.376712,-627.053821,-28.924871,-0.541511,27.514374,711.472604
