<a href="https://colab.research.google.com/github/arham5siddiqui/Mitigating-Linkability-Attacks-through-Differential-Privacy-enabled-Neural-Network-Training/blob/main/Step1_RESAMPLing_Split_EulerConvert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import os
import math
import shutil
from sklearn.model_selection import train_test_split

In [None]:



def slerp(p0, p1, t):
    dot_product = np.dot(p0, p1)
    dot_product = np.clip(dot_product, -1.0, 1.0)  # Clip to the range [-1, 1]

    omega = np.arccos(dot_product)
    so = np.sin(omega)

    return (np.sin((1.0 - t) * omega) / so) * p0 + (np.sin(t * omega) / so) * p1



# Function to resample a single DataFrame
def resample_data(df):
    print(f"Debug: Processing file {file_name}")  # Debug: print the current file name

    # Resample numerical data at 30 Hz using linear interpolation
    df_resampled = df.resample('33ms').mean()

    # Manual SLERP for quaternion values
    not_nan_indices = df.index[~df['UnitQuaternion.w'].isna()]
    for i in range(len(not_nan_indices) - 1):
        t0 = not_nan_indices[i]
        t1 = not_nan_indices[i + 1]

        print(f"Debug: Processing timestamp {t1}")  # Debug: print the current timestamp

        p0 = df.loc[t0, ['UnitQuaternion.w', 'UnitQuaternion.x', 'UnitQuaternion.y', 'UnitQuaternion.z']].values
        p1 = df.loc[t1, ['UnitQuaternion.w', 'UnitQuaternion.x', 'UnitQuaternion.y', 'UnitQuaternion.z']].values[1]
        print(f"Debug: p1.shape = {p1.shape}, p1 = {p1}")  # Debug line
        for t in pd.date_range(start=t0, end=t1, freq='33ms')[1:-1]:
            new_p = slerp(p0, p1, (t - t0) / (t1 - t0))
            df_resampled.loc[t, ['UnitQuaternion.w', 'UnitQuaternion.x', 'UnitQuaternion.y', 'UnitQuaternion.z']] = new_p

    return df_resampled

# Loop through each subfolder and each file within that subfolder
base_input_folder = '/content/drive/MyDrive/MSc Project/Formated_Data/Experiment_1/'  # Replace with your actual path
base_output_folder = '/content/drive/MyDrive/MSc Project/Formated_Data/Experiment_1/RESAMPLED/'  # Replace with your desired output path

for i in range(1, 49):  # subfolders are named as numbers from 1 to 48
    subfolder = str(i)
    input_path = os.path.join(base_input_folder, subfolder)
    output_path = os.path.join(base_output_folder, subfolder)

    # Create output subfolder if it doesn't exist
    os.makedirs(output_path, exist_ok=True)

    for file_name in os.listdir(input_path):
        if file_name.endswith('.csv'):
            # Read the CSV file into a DataFrame
            df = pd.read_csv(os.path.join(input_path, file_name))

            # Convert timestamp to datetime format and set as index
            df['Timestamp'] = pd.to_datetime(df['Timestamp'])
            df.set_index('Timestamp', inplace=True)

            # Resample the DataFrame
            df_resampled = resample_data(df)

            # Save the resampled DataFrame to a new CSV file
            df_resampled.to_csv(os.path.join(output_path, f'resampled_{file_name}'))


Train-Test dataset split

In [None]:


# Define the base directories
base_input_dir = '/content/drive/MyDrive/MSc Project/Formated_Data/Experiment_1/RESAMPLED'  # Replace with the path to your input folder
base_output_train_dir = '/content/drive/MyDrive/MSc Project/Formated_Data/Experiment_1/TrainTestDataset'  # Replace with the path where you want to save training sets
base_output_test_dir = '/content/drive/MyDrive/MSc Project/Formated_Data/Experiment_1/TrainTestDataset'  # Replace with the path where you want to save test sets

# Loop through each user's subfolder
for user_folder in range(1, 49):  # Assuming subfolders are named as numbers from 1 to 48
    user_folder_str = str(user_folder)
    input_path = os.path.join(base_input_dir, user_folder_str)
    output_train_path = os.path.join(base_output_train_dir, user_folder_str)
    output_test_path = os.path.join(base_output_test_dir, user_folder_str)

    # Create output subfolders if they don't exist
    os.makedirs(output_train_path, exist_ok=True)
    os.makedirs(output_test_path, exist_ok=True)

    # Loop through each CSV file in the subfolder
    for video_id, file_name in enumerate(os.listdir(input_path)):
        if file_name.endswith('.csv'):
            df = pd.read_csv(os.path.join(input_path, file_name))

            # Create DataFrames for userID and videoID
            user_df = pd.DataFrame({'userID': [user_folder] * len(df)})
            video_df = pd.DataFrame({'videoID': [video_id] * len(df)})

            # Concatenate userID and videoID DataFrames with the original DataFrame
            df = pd.concat([user_df, video_df, df], axis=1)

            # Perform train-test split
            train_df, test_df = train_test_split(df, test_size=0.25, random_state=42, stratify=df['userID'])

            # Save the split datasets
            train_df.to_csv(os.path.join(output_train_path, f'train_{file_name}'), index=False)
            test_df.to_csv(os.path.join(output_test_path, f'test_{file_name}'), index=False)


Separate Test and Train data

In [None]:
base_input_dir = '/content/drive/MyDrive/MSc Project/Formated_Data/Experiment_1/TrainTestDataset'  # Replace with the path where you saved the train and test sets

# Define the destination directories
base_output_train_dir = os.path.join(base_input_dir, 'Train')
base_output_test_dir = os.path.join(base_input_dir, 'Test')

# Create 'Train' and 'Test' folders if they don't exist
os.makedirs(base_output_train_dir, exist_ok=True)
os.makedirs(base_output_test_dir, exist_ok=True)

# Loop through each user's subfolder
for user_folder in range(1, 49):  # Assuming subfolders are named as numbers from 1 to 48
    user_folder_str = str(user_folder)
    user_path = os.path.join(base_input_dir, user_folder_str)

    # Loop through each file in the user's subfolder
    for file_name in os.listdir(user_path):
        original_file_path = os.path.join(user_path, file_name)

        # Check if the file is a train or test set
        if file_name.startswith('train_'):
            new_file_name = f"{user_folder_str}_{file_name}"
            shutil.copy(original_file_path, os.path.join(base_output_train_dir, new_file_name))
        elif file_name.startswith('test_'):
            new_file_name = f"{user_folder_str}_{file_name}"
            shutil.copy(original_file_path, os.path.join(base_output_test_dir, new_file_name))


Euler Conversion

In [None]:


def quaternion_to_euler(w, x, y, z):
    # Conversion formulas
    ysqr = y * y
    t0 = 2.0 * (w * x + y * z)
    t1 = 1.0 - 2.0 * (x * x + ysqr)
    roll = np.arctan2(t0, t1)

    t2 = np.clip(2.0 * (w * y - z * x), -1.0, 1.0)
    pitch = np.arcsin(t2)

    t3 = 2.0 * (w * z + x * y)
    t4 = 1.0 - 2.0 * (ysqr + z * z)
    yaw = np.arctan2(t3, t4)

    return yaw, pitch, roll

# Base folders for Train and Test data
base_train_folder = '/content/drive/MyDrive/MSc Project/Formated_Data/Experiment_1/TrainTestDataset'  # Replace with your actual path for Train data
base_test_folder = '/content/drive/MyDrive/MSc Project/Formated_Data/Experiment_1/TrainTestDataset'  # Replace with your actual path for Test data

# Loop through each subfolder and each file within that subfolder for both Train and Test data
for base_folder in [base_train_folder, base_test_folder]:
    for user_folder in os.listdir(base_folder):
        user_path = os.path.join(base_folder, user_folder)
        for file_name in os.listdir(user_path):
            if file_name.endswith('.csv'):
                file_path = os.path.join(user_path, file_name)

                # Read the CSV file into a DataFrame
                df = pd.read_csv(file_path)

                # Compute Euler angles from quaternion data
                df['yaw'], df['pitch'], df['roll'] = quaternion_to_euler(
                    df['UnitQuaternion.w'].to_numpy(),
                    df['UnitQuaternion.x'].to_numpy(),
                    df['UnitQuaternion.y'].to_numpy(),
                    df['UnitQuaternion.z'].to_numpy()
                )

                # Updating the CSV file with Euler angles
                df.to_csv(file_path, index=False)


The basic codes of dataset splitting and SLERP resampling were referred into, after referring the code with custom changes and requirements, a complete code was formulated for Step1 notebook.