### **preprocess teh sensors data**

In [8]:
import os
import pandas as pd
import numpy as np
from scipy import signal
from tqdm import tqdm
import shutil

In [9]:
def preprocess_limb_data(file_path):
    df = pd.read_csv(file_path)
    required_columns = ['server_timestamp', 'session_id', 'goalkeeper_id', 'shot_result', 'type', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z', 'mag_x', 'mag_y', 'mag_z']
    
    available_columns = [col for col in required_columns if col in df.columns]
    df = df[available_columns]
    
    # sensor data columns
    sensor_columns = [
        'accel_x', 'accel_y', 'accel_z',
        'gyro_x', 'gyro_y', 'gyro_z',
        'mag_x', 'mag_y', 'mag_z'
    ]
    
    # here we apply preprocessing to each sensor column
    for col in sensor_columns:
        if col in df.columns:
            detrended = signal.detrend(df[col].values, type='linear')
            
            window_length = min(15, len(detrended))
            if window_length > 2: 
                smoothed = signal.savgol_filter(detrended, window_length=window_length, polyorder=2)
            else:
                smoothed = detrended 
            
            df[col] = smoothed
    
    return df

In [10]:
def process_dataset(original_root, cleaned_root):
    limb_files = [
        ('RightArm.csv', 'RA'), 
        ('LeftArm.csv', 'LA'),
        ('RightLeg.csv', 'RL'),
        ('LeftLeg.csv', 'LL')
    ]
    os.makedirs(cleaned_root, exist_ok=True)
    
    for root, dirs, files in os.walk(original_root):
        rel_path = os.path.relpath(root, original_root)
        new_dir = os.path.join(cleaned_root, rel_path)
        os.makedirs(new_dir, exist_ok=True)
        
        for filename, limb_code in limb_files:
            if filename in files:
                original_file_path = os.path.join(root, filename)
                new_file_path = os.path.join(new_dir, filename)
                
                try:
                    processed_df = preprocess_limb_data(original_file_path)
                    processed_df.to_csv(new_file_path, index=False)
                    
                except Exception as e:
                    print(f"Error processing {original_file_path}: {str(e)}")
                    continue

In [11]:
if __name__ == "__main__":
    original_dataset_root = r"C:\uni\talent_rec-main (1)\talent_rec-main\saved\df\movement_with_feedback.csv"
    cleaned_dataset_root = r"C:\uni\talent_rec-main (1)\talent_rec-main\saved\cleaned"
    process_dataset(original_dataset_root, cleaned_dataset_root)
    print("\nDataset preprocessing complete!")


Dataset preprocessing complete!
