In [1]:
import numpy as np
import pandas as pd
from PIL import Image
import os
from sklearn.manifold import TSNE
import csv
import json

# Define the paths
image_folder_path = 'output'
json_folder_path = 'data'  # Define the path to the folder containing the JSON files
tsne_output_csv_path = 'spiral_tsne_data_velocity.csv'
resized_dimensions = (150, 112)  # Resize to 25% of the original size (600x450)

# Function to check if a file is hidden
def is_hidden(filepath):
    return any(part.startswith('.') for part in filepath.split(os.path.sep))

# Function to calculate the average instantaneous velocity
def calculate_average_velocity(coordinates):
    velocities = []
    for i in range(1, len(coordinates)):
        x1, y1, t1 = coordinates[i-1]['x'], coordinates[i-1]['y'], coordinates[i-1]['timestamp']
        x2, y2, t2 = coordinates[i]['x'], coordinates[i]['y'], coordinates[i]['timestamp']
        if t2 != t1:  # Prevent division by zero
            distance = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
            time_diff = (t2 - t1) / 1000.0  # Convert ms to seconds
            velocity = distance / time_diff
            velocities.append(velocity)
    return np.mean(velocities) if velocities else 0

# List to hold all rows of data
data_rows = []
filenames = []

# Step 1: Process images and collect pixel data
for filename in os.listdir(image_folder_path):
    if filename.endswith('.png') and not is_hidden(filename):
        image_path = os.path.join(image_folder_path, filename)
        
        # Load the image using Pillow
        image = Image.open(image_path).convert('L')  # Convert image to grayscale
        
        # Resize the image
        resized_image = image.resize(resized_dimensions)
        
        # Convert the resized image to a numpy array
        pixel_data = np.array(resized_image)
        
        # Flatten the array to get a 1D array of pixel values
        flattened_pixel_data = pixel_data.flatten()
        
        # Append the data row and filename
        data_rows.append(flattened_pixel_data)
        filenames.append(filename)

# Convert the data to a numpy array
pixel_data_array = np.array(data_rows)

# Step 2: Apply t-SNE to the pixel data
tsne = TSNE(n_components=3, random_state=42)
tsne_results = tsne.fit_transform(pixel_data_array)

# Step 3: Process the JSON files and match data
json_data = {}
for json_filename in os.listdir(json_folder_path):
    if not is_hidden(json_filename):
        json_path = os.path.join(json_folder_path, json_filename)
        with open(json_path, 'r') as json_file:
            data = json.load(json_file)
            json_data[data['time-uploaded']] = {
                'userId': data['userId'],
                'dominantHand': data['dominantHand'],
                'drawnHand': data['drawnHand'],
                'isParkinsonPatient': data['isParkinsonPatient'],
                'isDominant': data['dominantHand'] == data['drawnHand'],
                'average_velocity': calculate_average_velocity(data['coordinates'])
            }

# Combine t-SNE results with JSON data
combined_data = []
for i, filename in enumerate(filenames):
    timestamp = int(os.path.splitext(filename)[0])
    if timestamp in json_data:
        combined_data.append([
            timestamp,
            tsne_results[i][0],
            tsne_results[i][1],
            tsne_results[i][2],
            json_data[timestamp]['average_velocity'],
            json_data[timestamp]['userId'],
            json_data[timestamp]['dominantHand'],
            json_data[timestamp]['drawnHand'],
            json_data[timestamp]['isParkinsonPatient'],
            json_data[timestamp]['isDominant']
        ])

# Sort the combined data by timestamp
combined_data.sort(key=lambda x: x[0])

# Step 4: Save the sorted t-SNE results to a CSV file with additional data
with open(tsne_output_csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(['timestamp', 'tsne1', 'tsne2', 'tsne3', 'average_velocity', 'userId', 'dominantHand', 'drawnHand', 'isParkinsonPatient', 'isDominant'])
    # Write the data rows
    writer.writerows(combined_data)

print(f"t-SNE data saved to {tsne_output_csv_path}")

t-SNE data saved to spiral_tsne_data_velocity.csv
