In [11]:
print("Shape of DataFrame:", left_df.shape)        # prints a tuple (rows, columns)
print("Shape of DataFrame:", right_df.shape)        # prints a tuple (rows, columns)


Shape of DataFrame: (256, 20)
Shape of DataFrame: (256, 20)


In [93]:
import pandas as pd
import os
import numpy as np

# Define expected columns for each participant
expected_columns = [
    'time', 'palm_position_x', 'palm_position_y', 'palm_position_z',
    'palm_normal_x', 'palm_normal_y', 'palm_normal_z',
    'palm_direction_x', 'palm_direction_y', 'palm_direction_z',
    'hand_grab_angle', 'hand_grab_strength', 'hand_pinch_angle', 'hand_pinch_strength',
    'thumb_extension', 'index_extension', 'middle_extension', 'ring_extension', 'pinky_extension'
]

def is_row_empty(row):
    return all(pd.isna(cell) or (isinstance(cell, str) and cell.strip() == '') for cell in row)

def trim_leading_empty_rows(df):
    for i in range(len(df)):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[i:].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def trim_trailing_empty_rows(df):
    for i in reversed(range(len(df))):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[:i+1].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def find_trial_split_index(df):
    for i in range(len(df)):
        if is_row_empty(df.iloc[i]):
            # Ensure next row is also empty (for ASL-G type cases)
            if i+1 < len(df) and is_row_empty(df.iloc[i+1]):
                return i
            # fallback if only one blank line
            return i
    return None

def split_trials(df):
    df = trim_leading_empty_rows(df)
    split_idx = find_trial_split_index(df)
    if split_idx is None:
        return df.reset_index(drop=True), pd.DataFrame(columns=df.columns)

    trial1 = df.iloc[:split_idx]
    trial2 = df.iloc[split_idx+1:]

    trial1 = trim_trailing_empty_rows(trial1)
    trial2 = trim_leading_empty_rows(trial2)
    trial2 = trim_trailing_empty_rows(trial2)

    return trial1.reset_index(drop=True), trial2.reset_index(drop=True)

def downsample_to_60_rows(df, target=60):
    if len(df) <= target:
        return df.reset_index(drop=True)
    indices = np.linspace(0, len(df) - 1, target, dtype=int)
    return df.iloc[indices].reset_index(drop=True)

# === MAIN ===
input_folder = r"C:\Users\Abhay\Downloads\Sheets"  # Change if needed
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

all_abhay_trial1, all_abhay_trial2 = [], []
all_arjun_trial1, all_arjun_trial2 = [], []

for file in csv_files:
    if file.lower() == 'asl - 3.csv':
        continue

    path = os.path.join(input_folder, file)
    try:
        df = pd.read_csv(path, header=None)
        df = df.iloc[1:].reset_index(drop=True)  # remove actual header
        df = df.iloc[:, :39]  # 19 Abhay + 1 blank + 19 Arjun

        abhay_df = df.iloc[:, :19].copy()
        arjun_df = df.iloc[:, 20:].copy()  # skip 1-column blank

        abhay_df.columns = [f"Abhay_{col}" for col in expected_columns]
        arjun_df.columns = [f"Arjun_{col}" for col in expected_columns]

        abhay_df.dropna(axis=1, how='all', inplace=True)
        arjun_df.dropna(axis=1, how='all', inplace=True)

        abhay_t1, abhay_t2 = split_trials(abhay_df)
        arjun_t1, arjun_t2 = split_trials(arjun_df)

        abhay_t1 = downsample_to_60_rows(abhay_t1)
        abhay_t2 = downsample_to_60_rows(abhay_t2)
        arjun_t1 = downsample_to_60_rows(arjun_t1)
        arjun_t2 = downsample_to_60_rows(arjun_t2)

        all_abhay_trial1.append(abhay_t1)
        all_abhay_trial2.append(abhay_t2)
        all_arjun_trial1.append(arjun_t1)
        all_arjun_trial2.append(arjun_t2)

        print(f"{file}:")
        print(f"  Abhay Trial 1 rows: {len(abhay_t1)}")
        print(f"  Abhay Trial 2 rows: {len(abhay_t2)}")
        print(f"  Arjun Trial 1 rows: {len(arjun_t1)}")
        print(f"  Arjun Trial 2 rows: {len(arjun_t2)}")
        print()

    except Exception as e:
        print(f"Error processing {file}: {e}")

# Optionally concatenate for full dataset
abhay_trial1_df = pd.concat(all_abhay_trial1, ignore_index=True)
abhay_trial2_df = pd.concat(all_abhay_trial2, ignore_index=True)
arjun_trial1_df = pd.concat(all_arjun_trial1, ignore_index=True)
arjun_trial2_df = pd.concat(all_arjun_trial2, ignore_index=True)

# Optionally save
# abhay_trial1_df.to_csv("abhay_trial1.csv", index=False)
# abhay_trial2_df.to_csv("abhay_trial2.csv", index=False)
# arjun_trial1_df.to_csv("arjun_trial1.csv", index=False)
# arjun_trial2_df.to_csv("arjun_trial2.csv


Air Quotes.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 1.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 10.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 2.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 4.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 5.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 6.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 7.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 8.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows:

Unnamed: 0,Abhay_time,Abhay_palm_position_x,Abhay_palm_position_y,Abhay_palm_position_z,Abhay_palm_normal_x,Abhay_palm_normal_y,Abhay_palm_normal_z,Abhay_palm_direction_x,Abhay_palm_direction_y,Abhay_palm_direction_z,Abhay_hand_grab_angle,Abhay_hand_grab_strength,Abhay_hand_pinch_angle,Abhay_hand_pinch_strength,Abhay_thumb_extension,Abhay_index_extension,Abhay_middle_extension,Abhay_ring_extension,Abhay_pinky_extension
0,0.462307,-7.291691,174.804199,18.49427,-0.193978,-0.978525,0.069727,-0.098436,-0.051304,-0.99382,0.018032,0.0,73.356422,0.0,1,1,1,1,1
1,0.462489,-7.194481,174.793045,18.480373,-0.1944,-0.978452,0.069571,-0.098643,-0.051064,-0.993812,0.015735,0.0,73.291641,0.0,1,1,1,1,1
2,1.001337,-3.023403,185.297943,26.636387,-0.299134,-0.941482,-0.15534,-0.061003,0.18133,-0.981529,1.651723,0.0,55.643162,1.0,0,1,1,0,0
3,1.022624,-3.416973,183.883408,28.147425,-0.317745,-0.935785,-0.152792,-0.066931,0.182878,-0.980855,1.967453,0.0,52.970123,1.0,0,1,1,0,0
4,1.052266,-3.934582,183.36644,29.235584,-0.323156,-0.932456,-0.161539,-0.075752,0.195638,-0.977746,2.219421,0.226856,50.303013,0.896072,0,1,0,0,0


In [88]:
print(abhay_trial1.shape)

(60, 19)


In [90]:
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import StandardScaler

# Define expected columns for each participant
expected_columns = [
    'time', 'palm_position_x', 'palm_position_y', 'palm_position_z',
    'palm_normal_x', 'palm_normal_y', 'palm_normal_z',
    'palm_direction_x', 'palm_direction_y', 'palm_direction_z',
    'hand_grab_angle', 'hand_grab_strength', 'hand_pinch_angle', 'hand_pinch_strength',
    'thumb_extension', 'index_extension', 'middle_extension', 'ring_extension', 'pinky_extension'
]

def is_row_empty(row):
    return all(pd.isna(cell) or (isinstance(cell, str) and cell.strip() == '') for cell in row)

def trim_leading_empty_rows(df):
    for i in range(len(df)):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[i:].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def trim_trailing_empty_rows(df):
    for i in reversed(range(len(df))):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[:i+1].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def find_trial_split_index(df):
    for i in range(len(df)):
        if is_row_empty(df.iloc[i]):
            if i+1 < len(df) and is_row_empty(df.iloc[i+1]):
                return i
            return i
    return None

def split_trials(df):
    df = trim_leading_empty_rows(df)
    split_idx = find_trial_split_index(df)
    if split_idx is None:
        return df.reset_index(drop=True), pd.DataFrame(columns=df.columns)

    trial1 = df.iloc[:split_idx]
    trial2 = df.iloc[split_idx+1:]

    trial1 = trim_trailing_empty_rows(trial1)
    trial2 = trim_leading_empty_rows(trial2)
    trial2 = trim_trailing_empty_rows(trial2)

    return trial1.reset_index(drop=True), trial2.reset_index(drop=True)

def downsample_to_60_rows(df, target=60):
    if len(df) <= target:
        return df.reset_index(drop=True)
    indices = np.linspace(0, len(df) - 1, target, dtype=int)
    return df.iloc[indices].reset_index(drop=True)

def standardize_df(df):
    non_time_cols = [col for col in df.columns if not col.endswith('_time')]
    scaler = StandardScaler()
    df[non_time_cols] = scaler.fit_transform(df[non_time_cols])
    return df

# === MAIN ===
input_folder = r"C:\Users\Abhay\Downloads\Sheets"
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

all_abhay_trial1, all_abhay_trial2 = [], []
all_arjun_trial1, all_arjun_trial2 = [], []

for file in csv_files:
    if file.lower() == 'asl - 3.csv':
        continue

    path = os.path.join(input_folder, file)
    try:
        df = pd.read_csv(path, header=None)
        df = df.iloc[1:].reset_index(drop=True)
        df = df.iloc[:, :39]  # 19 Abhay + 1 blank + 19 Arjun

        abhay_df = df.iloc[:, :19].copy()
        arjun_df = df.iloc[:, 20:].copy()

        abhay_df.columns = [f"Abhay_{col}" for col in expected_columns]
        arjun_df.columns = [f"Arjun_{col}" for col in expected_columns]

        abhay_df.dropna(axis=1, how='all', inplace=True)
        arjun_df.dropna(axis=1, how='all', inplace=True)

        abhay_t1, abhay_t2 = split_trials(abhay_df)
        arjun_t1, arjun_t2 = split_trials(arjun_df)

        abhay_t1 = downsample_to_60_rows(abhay_t1)
        abhay_t2 = downsample_to_60_rows(abhay_t2)
        arjun_t1 = downsample_to_60_rows(arjun_t1)
        arjun_t2 = downsample_to_60_rows(arjun_t2)

        abhay_t1 = standardize_df(abhay_t1)
        abhay_t2 = standardize_df(abhay_t2)
        arjun_t1 = standardize_df(arjun_t1)
        arjun_t2 = standardize_df(arjun_t2)

        all_abhay_trial1.append(abhay_t1)
        all_abhay_trial2.append(abhay_t2)
        all_arjun_trial1.append(arjun_t1)
        all_arjun_trial2.append(arjun_t2)

        print(f"{file}:")
        print(f"  Abhay Trial 1 rows: {len(abhay_t1)}")
        print(f"  Abhay Trial 2 rows: {len(abhay_t2)}")
        print(f"  Arjun Trial 1 rows: {len(arjun_t1)}")
        print(f"  Arjun Trial 2 rows: {len(arjun_t2)}")
        print()

    except Exception as e:
        print(f"Error processing {file}: {e}")

# Combine all trials
abhay_trial1_df = pd.concat(all_abhay_trial1, ignore_index=True)
abhay_trial2_df = pd.concat(all_abhay_trial2, ignore_index=True)
arjun_trial1_df = pd.concat(all_arjun_trial1, ignore_index=True)
arjun_trial2_df = pd.concat(all_arjun_trial2, ignore_index=True)

# Optionally save standardized data
# abhay_trial1_df.to_csv("abhay_trial1_standardized.csv", index=False)
# abhay_trial2_df.to_csv("abhay_trial2_standardized.csv", index=False)
# arjun_trial1_df.to_csv("arjun_trial1_standardized.csv", index=False)
# arjun_trial2_df.to_csv("arjun_trial2_st_


Air Quotes.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 1.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 10.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 2.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 4.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 5.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 6.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 7.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows: 60
  Arjun Trial 2 rows: 60

ASL - 8.csv:
  Abhay Trial 1 rows: 60
  Abhay Trial 2 rows: 60
  Arjun Trial 1 rows:

In [92]:
abhay_trial1_df.head()

Unnamed: 0,Abhay_time,Abhay_palm_position_x,Abhay_palm_position_y,Abhay_palm_position_z,Abhay_palm_normal_x,Abhay_palm_normal_y,Abhay_palm_normal_z,Abhay_palm_direction_x,Abhay_palm_direction_y,Abhay_palm_direction_z,Abhay_hand_grab_angle,Abhay_hand_grab_strength,Abhay_hand_pinch_angle,Abhay_hand_pinch_strength,Abhay_thumb_extension,Abhay_index_extension,Abhay_middle_extension,Abhay_ring_extension,Abhay_pinky_extension
0,0.462307,-3.196994,-1.096355,-1.527786,1.669171,-1.788879,4.535564,-2.941651,-4.245576,-1.554073,-4.793009,-0.590608,4.265439,-5.261179,5.385165,0.0,0.733799,5.385165,5.385165
1,0.462489,-3.138356,-1.10005,-1.532937,1.658151,-1.783446,4.532139,-2.953457,-4.240626,-1.552575,-4.798883,-0.590608,4.253109,-5.261179,5.385165,0.0,0.733799,5.385165,5.385165
2,1.001337,-0.622328,2.380192,1.49027,-1.077012,0.968157,-0.406718,-0.806623,0.553194,0.746471,-0.615378,-0.590608,0.893985,0.267215,-0.185695,0.0,0.733799,-0.185695,-0.185695
3,1.022624,-0.859733,1.911561,2.05037,-1.563045,1.392173,-0.350766,-1.144732,0.585127,0.872626,0.191999,-0.590608,0.385212,0.267215,-0.185695,0.0,0.733799,-0.185695,-0.185695
4,1.052266,-1.171959,1.740291,2.45372,-1.704355,1.639944,-0.542843,-1.647847,0.84834,1.454547,0.836325,2.032686,-0.122433,-0.30734,-0.185695,0.0,-1.36277,-0.185695,-0.185695


In [94]:
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import StandardScaler

# Expected columns for each participant
expected_columns = [
    'time', 'palm_position_x', 'palm_position_y', 'palm_position_z',
    'palm_normal_x', 'palm_normal_y', 'palm_normal_z',
    'palm_direction_x', 'palm_direction_y', 'palm_direction_z',
    'hand_grab_angle', 'hand_grab_strength', 'hand_pinch_angle', 'hand_pinch_strength',
    'thumb_extension', 'index_extension', 'middle_extension', 'ring_extension', 'pinky_extension'
]

def is_row_empty(row):
    return all(pd.isna(cell) or (isinstance(cell, str) and cell.strip() == '') for cell in row)

def trim_leading_empty_rows(df):
    for i in range(len(df)):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[i:].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def trim_trailing_empty_rows(df):
    for i in reversed(range(len(df))):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[:i+1].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def find_trial_split_index(df):
    for i in range(len(df)):
        if is_row_empty(df.iloc[i]):
            if i+1 < len(df) and is_row_empty(df.iloc[i+1]):
                return i
            return i
    return None

def split_trials(df):
    df = trim_leading_empty_rows(df)
    split_idx = find_trial_split_index(df)
    if split_idx is None:
        return df.reset_index(drop=True), pd.DataFrame(columns=df.columns)

    trial1 = df.iloc[:split_idx]
    trial2 = df.iloc[split_idx+1:]

    trial1 = trim_trailing_empty_rows(trial1)
    trial2 = trim_leading_empty_rows(trial2)
    trial2 = trim_trailing_empty_rows(trial2)

    return trial1.reset_index(drop=True), trial2.reset_index(drop=True)

def downsample(df, target):
    if len(df) <= target:
        return df.reset_index(drop=True)
    indices = np.linspace(0, len(df) - 1, target, dtype=int)
    return df.iloc[indices].reset_index(drop=True)

def standardize_df(df):
    non_time_cols = [col for col in df.columns if not col.endswith('_time')]
    scaler = StandardScaler()
    df[non_time_cols] = scaler.fit_transform(df[non_time_cols])
    return df

# === MAIN ===
input_folder = r"C:\Users\Abhay\Downloads\Sheets"
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

# Store raw trials first to determine median
trial_lengths = []
raw_trials = []

for file in csv_files:
    if file.lower() == 'asl - 3.csv':
        continue

    path = os.path.join(input_folder, file)
    try:
        df = pd.read_csv(path, header=None)
        df = df.iloc[1:].reset_index(drop=True)
        df = df.iloc[:, :39]  # 19 + 1 + 19

        abhay_df = df.iloc[:, :19].copy()
        arjun_df = df.iloc[:, 20:].copy()

        abhay_df.columns = [f"Abhay_{col}" for col in expected_columns]
        arjun_df.columns = [f"Arjun_{col}" for col in expected_columns]

        abhay_df.dropna(axis=1, how='all', inplace=True)
        arjun_df.dropna(axis=1, how='all', inplace=True)

        abhay_t1, abhay_t2 = split_trials(abhay_df)
        arjun_t1, arjun_t2 = split_trials(arjun_df)

        for trial in [abhay_t1, abhay_t2, arjun_t1, arjun_t2]:
            if not trial.empty:
                trial_lengths.append(len(trial))
                raw_trials.append((file, trial))  # store with file reference

    except Exception as e:
        print(f"Error processing {file}: {e}")

# Compute median trial length
median_length = int(np.median(trial_lengths))
print(f"\nðŸŒŸ Median trial length across all files: {median_length} rows\n")

# === Second pass: Downsample + Standardize ===
all_abhay_trial1, all_abhay_trial2 = [], []
all_arjun_trial1, all_arjun_trial2 = [], []

for file in csv_files:
    if file.lower() == 'asl - 3.csv':
        continue

    path = os.path.join(input_folder, file)
    try:
        df = pd.read_csv(path, header=None)
        df = df.iloc[1:].reset_index(drop=True)
        df = df.iloc[:, :39]

        abhay_df = df.iloc[:, :19].copy()
        arjun_df = df.iloc[:, 20:].copy()

        abhay_df.columns = [f"Abhay_{col}" for col in expected_columns]
        arjun_df.columns = [f"Arjun_{col}" for col in expected_columns]

        abhay_df.dropna(axis=1, how='all', inplace=True)
        arjun_df.dropna(axis=1, how='all', inplace=True)

        abhay_t1, abhay_t2 = split_trials(abhay_df)
        arjun_t1, arjun_t2 = split_trials(arjun_df)

        abhay_t1 = downsample(abhay_t1, median_length)
        abhay_t2 = downsample(abhay_t2, median_length)
        arjun_t1 = downsample(arjun_t1, median_length)
        arjun_t2 = downsample(arjun_t2, median_length)

        abhay_t1 = standardize_df(abhay_t1)
        abhay_t2 = standardize_df(abhay_t2)
        arjun_t1 = standardize_df(arjun_t1)
        arjun_t2 = standardize_df(arjun_t2)

        all_abhay_trial1.append(abhay_t1)
        all_abhay_trial2.append(abhay_t2)
        all_arjun_trial1.append(arjun_t1)
        all_arjun_trial2.append(arjun_t2)

        print(f"{file}:")
        print(f"  Abhay Trial 1: {len(abhay_t1)} rows")
        print(f"  Abhay Trial 2: {len(abhay_t2)} rows")
        print(f"  Arjun Trial 1: {len(arjun_t1)} rows")
        print(f"  Arjun Trial 2: {len(arjun_t2)} rows\n")

    except Exception as e:
        print(f"Error reprocessing {file}: {e}")

# Final combined datasets
abhay_trial1_df = pd.concat(all_abhay_trial1, ignore_index=True)
abhay_trial2_df = pd.concat(all_abhay_trial2, ignore_index=True)
arjun_trial1_df = pd.concat(all_arjun_trial1, ignore_index=True)
arjun_trial2_df = pd.concat(all_arjun_trial2, ignore_index=True)

# Optional: Save or return
# abhay_trial1_df.to_csv("abhay_trial1_standardized.csv", index=False)
# arjun_trial2_df.to_csv("arjun_trial2_standardized.csv", index=False)



ðŸŒŸ Median trial length across all files: 118 rows

Air Quotes.csv:
  Abhay Trial 1: 63 rows
  Abhay Trial 2: 96 rows
  Arjun Trial 1: 112 rows
  Arjun Trial 2: 112 rows

ASL - 1.csv:
  Abhay Trial 1: 118 rows
  Abhay Trial 2: 118 rows
  Arjun Trial 1: 118 rows
  Arjun Trial 2: 118 rows

ASL - 10.csv:
  Abhay Trial 1: 118 rows
  Abhay Trial 2: 118 rows
  Arjun Trial 1: 118 rows
  Arjun Trial 2: 106 rows

ASL - 2.csv:
  Abhay Trial 1: 118 rows
  Abhay Trial 2: 118 rows
  Arjun Trial 1: 118 rows
  Arjun Trial 2: 118 rows

ASL - 4.csv:
  Abhay Trial 1: 118 rows
  Abhay Trial 2: 105 rows
  Arjun Trial 1: 82 rows
  Arjun Trial 2: 100 rows

ASL - 5.csv:
  Abhay Trial 1: 107 rows
  Abhay Trial 2: 106 rows
  Arjun Trial 1: 102 rows
  Arjun Trial 2: 98 rows

ASL - 6.csv:
  Abhay Trial 1: 105 rows
  Abhay Trial 2: 105 rows
  Arjun Trial 1: 88 rows
  Arjun Trial 2: 107 rows

ASL - 7.csv:
  Abhay Trial 1: 118 rows
  Abhay Trial 2: 108 rows
  Arjun Trial 1: 118 rows
  Arjun Trial 2: 118 rows

ASL

In [96]:
abhay_trial1_df.

(11087, 19)

In [104]:
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import StandardScaler

# === Column Definition ===
expected_columns = [
    'time', 'palm_position_x', 'palm_position_y', 'palm_position_z',
    'palm_normal_x', 'palm_normal_y', 'palm_normal_z',
    'palm_direction_x', 'palm_direction_y', 'palm_direction_z',
    'hand_grab_angle', 'hand_grab_strength', 'hand_pinch_angle', 'hand_pinch_strength',
    'thumb_extension', 'index_extension', 'middle_extension', 'ring_extension', 'pinky_extension'
]

# === Utilities ===
def is_row_empty(row):
    return all(pd.isna(cell) or (isinstance(cell, str) and cell.strip() == '') for cell in row)

def trim_leading_empty_rows(df):
    for i in range(len(df)):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[i:].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def trim_trailing_empty_rows(df):
    for i in reversed(range(len(df))):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[:i+1].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def find_trial_split_index(df):
    for i in range(len(df)):
        if is_row_empty(df.iloc[i]):
            if i+1 < len(df) and is_row_empty(df.iloc[i+1]):
                return i
            return i
    return None

def split_trials(df):
    df = trim_leading_empty_rows(df)
    split_idx = find_trial_split_index(df)
    if split_idx is None:
        return df.reset_index(drop=True), pd.DataFrame(columns=df.columns)

    trial1 = df.iloc[:split_idx]
    trial2 = df.iloc[split_idx+1:]

    trial1 = trim_trailing_empty_rows(trial1)
    trial2 = trim_leading_empty_rows(trial2)
    trial2 = trim_trailing_empty_rows(trial2)

    return trial1.reset_index(drop=True), trial2.reset_index(drop=True)
def safe_resample(df_list, time_col, target_rows):
    return [
        resample_by_time(df, time_col, target_rows)
        for df in df_list
        if time_col in df.columns and not df.empty
    ]

def resample_by_time(df, time_col, target_rows):
    df = df.copy()

    # 1. Convert time column to numeric
    df[time_col] = pd.to_numeric(df[time_col], errors='coerce')
    df = df.dropna(subset=[time_col])
    df = df.drop_duplicates(subset=time_col)

    # 2. Convert all other columns to numeric
    for col in df.columns:
        if col != time_col:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # 3. Resample
    df = df.set_index(time_col)
    new_time_index = np.linspace(df.index.min(), df.index.max(), target_rows)
    df_resampled = df.reindex(new_time_index)
    df_resampled = df_resampled.interpolate(method='linear', axis=0).reset_index()
    df_resampled.rename(columns={'index': time_col}, inplace=True)

    return df_resampled



def standardize_df(df, exclude_cols):
    scaler = StandardScaler()
    cols_to_scale = [col for col in df.columns if col not in exclude_cols]
    df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])
    return df

# === MAIN ===
input_folder = r"C:\Users\Abhay\Downloads\Sheets"
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

all_abhay_trial1, all_abhay_trial2 = [], []
all_arjun_trial1, all_arjun_trial2 = [], []

for file in csv_files:
    if file.lower() == 'asl - 3.csv':
        continue

    path = os.path.join(input_folder, file)
    try:
        df = pd.read_csv(path, header=None)
        df = df.iloc[1:].reset_index(drop=True)
        df = df.iloc[:, :39]  # 19 Abhay + 1 blank + 19 Arjun

        abhay_df = df.iloc[:, :19].copy()
        arjun_df = df.iloc[:, 20:].copy()

        abhay_df.columns = [f"Abhay_{col}" for col in expected_columns]
        arjun_df.columns = [f"Arjun_{col}" for col in expected_columns]

        abhay_df.dropna(axis=1, how='all', inplace=True)
        arjun_df.dropna(axis=1, how='all', inplace=True)

        abhay_t1, abhay_t2 = split_trials(abhay_df)
        arjun_t1, arjun_t2 = split_trials(arjun_df)

        all_abhay_trial1.append(abhay_t1)
        all_abhay_trial2.append(abhay_t2)
        all_arjun_trial1.append(arjun_t1)
        all_arjun_trial2.append(arjun_t2)

    except Exception as e:
        print(f"Error processing {file}: {e}")

# === Compute median length ===
all_lengths = [len(df) for df in all_abhay_trial1 + all_abhay_trial2 + all_arjun_trial1 + all_arjun_trial2]
median_len = int(np.median(all_lengths))
print(f"Using median trial length: {median_len}")

# === Resample ===
all_abhay_trial1 = safe_resample(all_abhay_trial1, 'Abhay_time', median_len)
all_abhay_trial2 = safe_resample(all_abhay_trial2, 'Abhay_time', median_len)
all_arjun_trial1 = safe_resample(all_arjun_trial1, 'Arjun_time', median_len)
all_arjun_trial2 = safe_resample(all_arjun_trial2, 'Arjun_time', median_len)

# === Standardize ===
all_abhay_trial1 = [standardize_df(df, ['Abhay_time']) for df in all_abhay_trial1]
all_abhay_trial2 = [standardize_df(df, ['Abhay_time']) for df in all_abhay_trial2]
all_arjun_trial1 = [standardize_df(df, ['Arjun_time']) for df in all_arjun_trial1]
all_arjun_trial2 = [standardize_df(df, ['Arjun_time']) for df in all_arjun_trial2]

# === Combine ===
abhay_trial1_df = pd.concat(all_abhay_trial1, ignore_index=True)
abhay_trial2_df = pd.concat(all_abhay_trial2, ignore_index=True)
arjun_trial1_df = pd.concat(all_arjun_trial1, ignore_index=True)
arjun_trial2_df = pd.concat(all_arjun_trial2, ignore_index=True)

# === Optional Save ===
# abhay_trial1_df.to_csv("abhay_trial1_processed.csv", index=False)
# abhay_trial2_df.to_csv("abhay_trial2_processed.csv", index=False)
# arjun_trial1_df.to_csv("arjun_trial1_processed.csv", index=False)
# arjun_trial2_df.to_csv("arjun_trial2_processed.csv", index=False)



Using median trial length: 118
