In [8]:
import glob
import os
import pandas as pd
import ast

In [9]:
# Define the path to the folder containing the text files
folder_path = '../data/data'

# Use glob to get a list of all text files in the folder
text_files = glob.glob(os.path.join(folder_path, '*.txt'))

# Initialize lists to store data
vectors = []
clients = []
angles = []
parkinson_status = []

# Loop through the list and read each file
for file_path in text_files:
    # Extract client, angle, and Parkinson status from the file name
    file_name = os.path.basename(file_path).split('.')[0]
    parts = file_name.split('-')
    client = int(parts[0])
    angle = int(parts[1])
    parkinson = 1 if parts[2].lower() == 'p' else 0
    
    with open(file_path, 'r') as file:
        for line in file:
            # Extract the vector from the line
            _, vector_str = line.split(': ')
            vector = ast.literal_eval(vector_str.strip())
            
            # Flatten the nested list if needed
            vector = [item for sublist1 in vector for sublist2 in sublist1 for item in sublist2]
            
             # Split vector into chunks of 17 elements
            for i in range(0, len(vector), 17):
                chunk = vector[i:i + 17]
                
                # Append to lists
                vectors.append(chunk)
                clients.append(client)
                angles.append(angle)
                parkinson_status.append(parkinson)

# Create the DataFrame
data = {
    'client': clients,
    'angle': angles,
    'parkinson': parkinson_status,
    'vector': vectors
}
df = pd.DataFrame(data)


In [10]:
#remove rows where vector == [] empty list
df = df[df.astype(str)['vector'] != '[]']

#check unique lengths for vector column
print(df['vector'].apply(len).unique())


[17]


In [11]:
#split vectors with 34 elements into 2 rows, 51 into 3 rows, 68 into 4 rows


In [12]:
df

Unnamed: 0,client,angle,parkinson,vector
0,1,0,1,"[[54.766632080078125, 14.624755859375], [59.95..."
1,1,0,1,"[[55.523643493652344, 31.943103790283203], [62..."
2,1,0,1,"[[53.158626556396484, 23.652265548706055], [0...."
3,1,0,1,"[[54.80318832397461, 9.729766845703125], [0.0,..."
4,1,0,1,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0..."
...,...,...,...,...
33986,9,180,0,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [48.35810..."
33987,9,180,0,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [29.57067..."
33988,9,180,0,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [138.8289..."
33989,9,180,0,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [53.21421..."


In [13]:
df.to_csv('../output_data/output_2D.csv', index=False)