In [1]:
import glob
import os
import pandas as pd
import ast

In [2]:
# Define the path to the folder containing the text files
folder_path = '../data/data'

# Use glob to get a list of all text files in the folder
text_files = glob.glob(os.path.join(folder_path, '*.txt'))

# Initialize lists to store data
vectors = []
clients = []
angles = []
parkinson_status = []

# Loop through the list and read each file
for file_path in text_files:
    # Extract client, angle, and Parkinson status from the file name
    file_name = os.path.basename(file_path).split('.')[0]
    parts = file_name.split('-')
    print(parts)
    client = int(parts[0])
    angle = int(parts[1])
    parkinson = 1 if parts[2].lower() == 'p' else 0
    
    with open(file_path, 'r') as file:
        for line in file:
            # Extract the vector from the line
            _, vector_str = line.split(': ')
            vector = ast.literal_eval(vector_str.strip())
            
            # Flatten the nested list if needed
            vector = [item for sublist1 in vector for sublist2 in sublist1 for item in sublist2]
            
            # Append to lists
            vectors.append(vector)
            clients.append(client)
            angles.append(angle)
            parkinson_status.append(parkinson)

# Create the DataFrame
data = {
    'client': clients,
    'angle': angles,
    'parkinson': parkinson_status,
    'vector': vectors
}
df = pd.DataFrame(data)


['1', '0', 'P']
['1', '0', 'T']
['1', '180', 'P']
['1', '180', 'T']
['10', '0', 'P']
['10', '0', 'T']
['10', '180', 'P']
['10', '180', 'T']
['11', '0', 'P']
['11', '0', 'T']
['11', '180', 'P']
['11', '180', 'T']
['12', '0', 'P']
['12', '0', 'T']
['12', '180', 'P']
['12', '180', 'T']
['13', '0', 'P']
['13', '0', 'T']
['13', '180', 'P']
['13', '180', 'T']
['14', '0', 'P']
['14', '0', 'T']
['14', '180', 'P']
['14', '180', 'T']
['15', '0', 'P']
['15', '0', 'T']
['15', '180', 'P']
['15', '180', 'T']
['16', '0', 'P']
['16', '0', 'T']
['16', '180', 'P(1)']
['16', '180', 'P']
['16', '180', 'T']
['17', '0', 'P(1)']
['17', '0', 'P']
['17', '0', 'T']
['17', '180', 'P(1)']
['17', '180', 'P']
['17', '180', 'T']
['18', '0', 'P']
['18', '0', 'T']
['18', '180', 'P']
['18', '180', 'T']
['19', '0', 'P']
['19', '0', 'T']
['19', '180', 'P']
['19', '180', 'T']
['2', '0', 'P']
['2', '0', 'T']
['2', '180', 'P']
['2', '180', 'T']
['20', '0', 'P']
['20', '0', 'T']
['20', '180', 'P']
['20', '180', 'T']
['21', '

In [3]:
#remove rows where vector == [] empty list
df = df[df.astype(str)['vector'] != '[]']


In [4]:
df.to_csv('../output_data/output_2D.csv', index=False)