In [8]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import os
sns.set_theme()
plt.rcParams['figure.figsize'] = [14,8]

In [9]:
CYCLIC_COLUMNS = ["PSVTime", "PSV", "PDVTime", "PDV", "EDVTime", "EDV", "ISVTime", "ISV", "MBF1", "MBF2", "SysTime", "DiasTime", "OSI", "WindowTime"]
TIME_SERIES_COLUMNS = ["Time","BloodFlow","Filtered Blood Flow", "Shear", "Filtered Shear", "Velocity", "Diameter"]
ANALYSIS_SETTINGS_COLUMNS = ["Peak Height Threshold", "Cycle Duration", "Peak Count", "EpochEndTime", "Velocity Calibration", "Time Calibration", "Distance Calibration", "Zero Velocity Row Position"]

In [10]:
def subj_file(subj_name: str, trial_type: str, data_type: str):
    folder = f"bloodflow_data/{subj_name}"
    filename = None
    files = None
    
    if trial_type in ['baseline', 'base', 'basline']:
        files = [file_name for file_name in os.listdir(folder) if 'baseline' in file_name or 'basline' in file_name or 'base' in file_name]

    elif trial_type in ['post-oc', 'post-occ', 'post-occlusion', 'post']:
        files = [file_name for file_name in os.listdir(folder) if 'post' in file_name]

    else:
        raise ValueError(f"{trial_type} is not an acceptable parameter for trial_type")
    
    if data_type in ['diameter', 'diam']:
        files = [file_name for file_name in files if 'diam' in file_name]
        filename = files[0][:-4]
    elif data_type in ['time', 'time_series', 'time series', 'cyclic'] :
        files = [file_name for file_name in files if 'flow' in file_name or 'doppler' in file_name]
        filename = files[0][:str.index(files[0], '_')]
        
    else:
        raise ValueError(f"{data_type} is not an acceptable parameter for data_type")
    
    return f"{folder}/{filename}", data_type

In [11]:
def load(subj_file):
    data = pd.DataFrame()
    if subj_file[1] in ['diameter', 'diam']:
        data['diameter'] = np.load(f"{subj_file[0]}.npy")['diameters']
        # data['diameter'] *= np.load(f"{file[0]}.npy")['px_to_cm']
        
    elif subj_file[1] == 'cyclic':
        data = pd.read_csv(f"{subj_file[0]}_cyclic.csv", header = None)
        data.columns = CYCLIC_COLUMNS
        
    elif subj_file[1] in ['time', 'time_series', 'time series']:
        data = pd.read_csv(f"{subj_file[0]}_time_series.csv", header = None)
        data.columns = TIME_SERIES_COLUMNS
    
    return data

In [12]:
sci_subj_id = list(range(4,24))
control_subj_id = list(range(1,15))

In [13]:
for subj_id in sci_subj_id:
    subj_name = f"ASCI{subj_id:03}"
    
    try:
        load(subj_file(subj_name, 'baseline', 'diam'))
    except:
        print(f"{subj_name}: Missing Baseline Diameter")
        
    try:
        load(subj_file(subj_name, 'post', 'diam'))
    except:
        print(f"{subj_name}: Missing Post Diameter")
    
    try:
        load(subj_file(subj_name, 'baseline', 'time'))
    except:
        print(f"{subj_name}: Missing Baseline Time Series")
    
    try:
        load(subj_file(subj_name, 'post', 'time'))
    except:
        print(f"{subj_name}: Missing Post Time Series")
    

ASCI019: Missing Post Time Series


In [14]:
for subj_id in control_subj_id:
    subj_name = f"ASCI1{subj_id:02}"
    
    try:
        load(subj_file(subj_name, 'baseline', 'diam'))
    except:
        print(f"{subj_name}: Missing Baseline Diameter")
        
    try:
        load(subj_file(subj_name, 'post', 'diam'))
    except:
        print(f"{subj_name}: Missing Post Diameter")
    
    try:
        load(subj_file(subj_name, 'baseline', 'time'))
    except:
        print(f"{subj_name}: Missing Baseline Time Series")
    
    try:
        load(subj_file(subj_name, 'post', 'time'))
    except:
        print(f"{subj_name}: Missing Post Time Series")
    