In [161]:
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import StandardScaler

# === Column Definition ===
expected_columns = [
    'time', 'palm_position_x', 'palm_position_y', 'palm_position_z',
    'palm_normal_x', 'palm_normal_y', 'palm_normal_z',
    'palm_direction_x', 'palm_direction_y', 'palm_direction_z',
    'hand_grab_angle', 'hand_grab_strength', 'hand_pinch_angle', 'hand_pinch_strength',
    'thumb_extension', 'index_extension', 'middle_extension', 'ring_extension', 'pinky_extension'
]

# === Utilities ===
def is_row_empty(row):
    return all(pd.isna(cell) or (isinstance(cell, str) and cell.strip() == '') for cell in row)

def trim_leading_empty_rows(df):
    for i in range(len(df)):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[i:].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def trim_trailing_empty_rows(df):
    for i in reversed(range(len(df))):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[:i+1].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def find_trial_split_index(df):
    for i in range(len(df)):
        if is_row_empty(df.iloc[i]):
            if i+1 < len(df) and is_row_empty(df.iloc[i+1]):
                return i
            return i
    return None

def split_trials(df):
    df = trim_leading_empty_rows(df)
    split_idx = find_trial_split_index(df)
    if split_idx is None:
        return df.reset_index(drop=True), pd.DataFrame(columns=df.columns)

    trial1 = df.iloc[:split_idx]
    trial2 = df.iloc[split_idx+1:]

    trial1 = trim_trailing_empty_rows(trial1)
    trial2 = trim_leading_empty_rows(trial2)
    trial2 = trim_trailing_empty_rows(trial2)

    return trial1.reset_index(drop=True), trial2.reset_index(drop=True)
def safe_resample(df_list, time_col, target_rows):
    resampled = []
    for i, df in enumerate(df_list):
        if time_col not in df.columns or df.empty:
            print(f"❌ Skipping trial {i}: Missing or empty time column '{time_col}'")
            continue
        result = resample_by_time(df, time_col, target_rows)
        if result.empty:
            print(f"❌ Skipping trial {i}: Resample failed — likely due to all-NaN time column")
        else:
            resampled.append(result)
    return resampled

def resample_by_time(df, time_col, target_rows):
    df = df.copy()

    # 1. Convert time column to numeric
    df[time_col] = pd.to_numeric(df[time_col], errors='coerce')
    df = df.dropna(subset=[time_col])
    df = df.drop_duplicates(subset=time_col)

    # 2. Convert all other columns to numeric
    for col in df.columns:
        if col != time_col:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # 3. Resample
    df = df.set_index(time_col)
    new_time_index = np.linspace(df.index.min(), df.index.max(), target_rows)
    df_resampled = df.reindex(new_time_index)
    df_resampled = df_resampled.interpolate(method='linear', axis=0).reset_index()
    df_resampled.rename(columns={'index': time_col}, inplace=True)

    return df_resampled



def standardize_df(df, exclude_cols):
    scaler = StandardScaler()
    cols_to_scale = [col for col in df.columns if col not in exclude_cols]
    df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])
    return df

# === MAIN ===
input_folder = r"C:\Users\Abhay\Downloads\Sheets"
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

all_abhay_trial1, all_abhay_trial2 = [], []
all_arjun_trial1, all_arjun_trial2 = [], []
print(f"{file} has {df.shape[1]} columns")

for file in csv_files:
    

    path = os.path.join(input_folder, file)
    try:
        df = pd.read_csv(path, header=None)
        df = df.iloc[1:].reset_index(drop=True)
        df = df.iloc[:, :39]  # 19 Abhay + 1 blank + 19 Arjun

        abhay_df = df.iloc[:, :19].copy()
        arjun_df = df.iloc[:, 20:39].copy()  # ← force 19 columns only
        
        abhay_df.columns = [f"Abhay_{col}" for col in expected_columns]
        arjun_df.columns = [f"Arjun_{col}" for col in expected_columns]
        if "Arjun_time" not in arjun_df.columns:
            print(f"🚫 Arjun_time column missing in {file} — actual columns: {arjun_df.columns.tolist()}")



        abhay_df.dropna(axis=1, how='all', inplace=True)
        arjun_df.dropna(axis=1, how='all', inplace=True)

        abhay_t1, abhay_t2 = split_trials(abhay_df)
        arjun_t1, arjun_t2 = split_trials(arjun_df)


        all_abhay_trial1.append(abhay_t1)
        all_abhay_trial2.append(abhay_t2)
        all_arjun_trial1.append(arjun_t1)
        all_arjun_trial2.append(arjun_t2)

    except Exception as e:
        print(f"Error processing {file}: {e}")

# === Compute median length ===
all_lengths = [len(df) for df in all_abhay_trial1 + all_abhay_trial2 + all_arjun_trial1 + all_arjun_trial2]
median_len = int(np.median(all_lengths))
print(f"Using median trial length: {median_len}")
print("\n=== Trial Lengths Per File ===")
for filename, a1, a2, r1, r2 in zip(csv_files, all_abhay_trial1, all_abhay_trial2, all_arjun_trial1, all_arjun_trial2):
    print(f"{filename}:")
    print(f"  Abhay Trial 1: {len(a1)} rows")
    print(f"  Abhay Trial 2: {len(a2)} rows")
    print(f"  Arjun Trial 1: {len(r1)} rows")
    print(f"  Arjun Trial 2: {len(r2)} rows")


# === Resample ===
all_abhay_trial1 = safe_resample(all_abhay_trial1, 'Abhay_time', median_len)
all_abhay_trial2 = safe_resample(all_abhay_trial2, 'Abhay_time', median_len)
all_arjun_trial1 = safe_resample(all_arjun_trial1, 'Arjun_time', median_len)
all_arjun_trial2 = safe_resample(all_arjun_trial2, 'Arjun_time', median_len)
print("\n=== Trial Lengths Per File ===")
for filename, a1, a2, r1, r2 in zip(csv_files, all_abhay_trial1, all_abhay_trial2, all_arjun_trial1, all_arjun_trial2):
    print(f"{filename}:")
    print(f"  Abhay Trial 1: {len(a1)} rows")
    print(f"  Abhay Trial 2: {len(a2)} rows")
    print(f"  Arjun Trial 1: {len(r1)} rows")
    print(f"  Arjun Trial 2: {len(r2)} rows")

# === Standardize ===
all_abhay_trial1 = [standardize_df(df, ['Abhay_time']) for df in all_abhay_trial1]
all_abhay_trial2 = [standardize_df(df, ['Abhay_time']) for df in all_abhay_trial2]
all_arjun_trial1 = [standardize_df(df, ['Arjun_time']) for df in all_arjun_trial1]
all_arjun_trial2 = [standardize_df(df, ['Arjun_time']) for df in all_arjun_trial2]


# === Optional Save ===
# abhay_trial1_df.to_csv("abhay_trial1_processed.csv", index=False)
# abhay_trial2_df.to_csv("abhay_trial2_processed.csv", index=False)
# arjun_trial1_df.to_csv("arjun_trial1_processed.csv", index=False)
# arjun_trial2_df.to_csv("arjun_trial2_processed.csv", index=False)

Wrench screwing.csv has 39 columns
Using median trial length: 118

=== Trial Lengths Per File ===
Air Quotes.csv:
  Abhay Trial 1: 63 rows
  Abhay Trial 2: 96 rows
  Arjun Trial 1: 112 rows
  Arjun Trial 2: 112 rows
ASL - 1.csv:
  Abhay Trial 1: 132 rows
  Abhay Trial 2: 120 rows
  Arjun Trial 1: 138 rows
  Arjun Trial 2: 142 rows
ASL - 10.csv:
  Abhay Trial 1: 139 rows
  Abhay Trial 2: 126 rows
  Arjun Trial 1: 135 rows
  Arjun Trial 2: 106 rows
ASL - 2.csv:
  Abhay Trial 1: 134 rows
  Abhay Trial 2: 141 rows
  Arjun Trial 1: 145 rows
  Arjun Trial 2: 136 rows
ASL - 3.csv:
  Abhay Trial 1: 102 rows
  Abhay Trial 2: 128 rows
  Arjun Trial 1: 74 rows
  Arjun Trial 2: 106 rows
ASL - 4.csv:
  Abhay Trial 1: 124 rows
  Abhay Trial 2: 105 rows
  Arjun Trial 1: 82 rows
  Arjun Trial 2: 100 rows
ASL - 5.csv:
  Abhay Trial 1: 107 rows
  Abhay Trial 2: 106 rows
  Arjun Trial 1: 102 rows
  Arjun Trial 2: 98 rows
ASL - 6.csv:
  Abhay Trial 1: 105 rows
  Abhay Trial 2: 105 rows
  Arjun Trial 1: 88

In [177]:
abhay_cascaded = []
arjun_cascaded = []

for a1, a2 in zip(all_abhay_trial1, all_abhay_trial2):
    if not a1.empty and not a2.empty and a1.shape[0] == median_len and a2.shape[0] == median_len:
        abhay_cascaded.append(cascade_trials(a1, a2, ['Abhay_time']))
    else:
        print("Skipping Abhay trial with unexpected shape.")

for r1, r2 in zip(all_arjun_trial1, all_arjun_trial2):
    if not r1.empty and not r2.empty and r1.shape[0] == median_len and r2.shape[0] == median_len:
        arjun_cascaded.append(cascade_trials(r1, r2, ['Arjun_time']))
    else:
        print("Skipping Arjun trial with unexpected shape.")
print(f"\nCascaded Abhay shape: {abhay_df_final.shape}")
print(f"Cascaded Arjun shape: {arjun_df_final.shape}")

Using median trial length: 118
Air Quotes.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 1.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 10.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 2.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 3.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 4.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 5.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 6.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 7.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 8.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - 9.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - A.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - B.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - Bad.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - C.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - D.csv: Abhay T1=118, T2=118 | Arjun T1=118, T2=118
ASL - Drink.csv: Abhay T1=118, T2=118 | Arjun T1=11

IndexError: list index out of range

In [147]:
arjun_trial2_df['Arjun_time']


0        0.332724
1        0.352683
2        0.372642
3        0.392601
4        0.412560
           ...   
11677    2.757203
11678    2.779014
11679    2.800825
11680    2.822637
11681    2.844448
Name: Arjun_time, Length: 11682, dtype: float64

In [178]:
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import StandardScaler

# === Column Definition ===
expected_columns = [
    'time', 'palm_position_x', 'palm_position_y', 'palm_position_z',
    'palm_normal_x', 'palm_normal_y', 'palm_normal_z',
    'palm_direction_x', 'palm_direction_y', 'palm_direction_z',
    'hand_grab_angle', 'hand_grab_strength', 'hand_pinch_angle', 'hand_pinch_strength',
    'thumb_extension', 'index_extension', 'middle_extension', 'ring_extension', 'pinky_extension'
]

# === Utilities ===
def is_row_empty(row):
    return all(pd.isna(cell) or (isinstance(cell, str) and cell.strip() == '') for cell in row)

def trim_leading_empty_rows(df):
    for i in range(len(df)):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[i:].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def trim_trailing_empty_rows(df):
    for i in reversed(range(len(df))):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[:i+1].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def find_trial_split_index(df):
    for i in range(len(df)):
        if is_row_empty(df.iloc[i]):
            return i
    return None

def split_trials(df):
    df = trim_leading_empty_rows(df)
    split_idx = find_trial_split_index(df)
    if split_idx is None:
        return df.reset_index(drop=True), pd.DataFrame(columns=df.columns)
    trial1 = df.iloc[:split_idx]
    trial2 = df.iloc[split_idx+1:]
    trial1 = trim_trailing_empty_rows(trial1)
    trial2 = trim_leading_empty_rows(trial2)
    trial2 = trim_trailing_empty_rows(trial2)
    return trial1.reset_index(drop=True), trial2.reset_index(drop=True)

def resample_by_time(df, time_col, target_rows):
    df = df.copy()
    df[time_col] = pd.to_numeric(df[time_col], errors='coerce')
    df = df.dropna(subset=[time_col])
    df = df.drop_duplicates(subset=time_col)
    if df.empty or df[time_col].nunique() < 2:
        print(f"⚠️ Skipping resample: insufficient or invalid time data in column '{time_col}'")
        return pd.DataFrame(columns=[time_col] + [col for col in df.columns if col != time_col])
    for col in df.columns:
        if col != time_col:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    df = df.set_index(time_col)
    new_time_index = np.linspace(df.index.min(), df.index.max(), target_rows)
    df_resampled = df.reindex(new_time_index)
    df_resampled = df_resampled.interpolate(method='linear', axis=0, limit_direction='both').reset_index()
    df_resampled.rename(columns={'index': time_col}, inplace=True)
    return df_resampled

def standardize_df(df, exclude_cols):
    scaler = StandardScaler()
    cols_to_scale = [col for col in df.columns if col not in exclude_cols]
    df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])
    return df

def cascade_trials(trial1, trial2, exclude_cols):
    trial1_flat = trial1[[col for col in trial1.columns if col not in exclude_cols]].to_numpy().T.flatten()
    trial2_flat = trial2[[col for col in trial2.columns if col not in exclude_cols]].to_numpy().T.flatten()
    return np.concatenate([trial1_flat, trial2_flat])

# === Main ===
input_folder = r"C:\Users\Abhay\Downloads\Sheets"
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]
trial_data = []

# Step 1: Load and split trials
for file in csv_files:
    path = os.path.join(input_folder, file)
    try:
        df = pd.read_csv(path, header=None)
        df = df.iloc[1:].reset_index(drop=True)
        df = df.iloc[:, :39]  # Expecting 19 Abhay + 1 blank + 19 Arjun

        abhay_df = df.iloc[:, :19].copy()
        arjun_df = df.iloc[:, -19:].copy()

        abhay_df.columns = [f"Abhay_{col}" for col in expected_columns]
        arjun_df.columns = [f"Arjun_{col}" for col in expected_columns]

        abhay_df.dropna(axis=1, how='all', inplace=True)
        arjun_df.dropna(axis=1, how='all', inplace=True)

        abhay_t1, abhay_t2 = split_trials(abhay_df)
        arjun_t1, arjun_t2 = split_trials(arjun_df)

        trial_data.append((file, abhay_t1, abhay_t2, arjun_t1, arjun_t2))
    except Exception as e:
        print(f"❌ Error processing {file}: {e}")

# Step 2: Compute median trial length
all_lengths = []
for _, a1, a2, r1, r2 in trial_data:
    all_lengths.extend([len(a1), len(a2), len(r1), len(r2)])
median_len = int(np.median(all_lengths))
print(f"\n📏 Using median trial length: {median_len}")

# Step 3: Resample, standardize, and collect clean files
all_abhay_trial1, all_abhay_trial2 = [], []
all_arjun_trial1, all_arjun_trial2 = [], []
valid_filenames = []

for file, a1, a2, r1, r2 in trial_data:
    if all(not df.empty for df in [a1, a2, r1, r2]):
        if 'Abhay_time' in a1.columns and 'Abhay_time' in a2.columns and 'Arjun_time' in r1.columns and 'Arjun_time' in r2.columns:
            ra1 = resample_by_time(a1, 'Abhay_time', median_len)
            ra2 = resample_by_time(a2, 'Abhay_time', median_len)
            rr1 = resample_by_time(r1, 'Arjun_time', median_len)
            rr2 = resample_by_time(r2, 'Arjun_time', median_len)
        
            if all(not df.empty for df in [ra1, ra2, rr1, rr2]):
                all_abhay_trial1.append(standardize_df(ra1, ['Abhay_time']))
                all_abhay_trial2.append(standardize_df(ra2, ['Abhay_time']))
                all_arjun_trial1.append(standardize_df(rr1, ['Arjun_time']))
                all_arjun_trial2.append(standardize_df(rr2, ['Arjun_time']))
                valid_filenames.append(file)
            else:
                print(f"⚠️ Skipped {file} due to empty resample result.")
        else:
            print(f"🚫 Skipped {file} due to missing time column in one or more trials.")


        if all(not df.empty for df in [ra1, ra2, rr1, rr2]):
            all_abhay_trial1.append(standardize_df(ra1, ['Abhay_time']))
            all_abhay_trial2.append(standardize_df(ra2, ['Abhay_time']))
            all_arjun_trial1.append(standardize_df(rr1, ['Arjun_time']))
            all_arjun_trial2.append(standardize_df(rr2, ['Arjun_time']))
            valid_filenames.append(file)
        else:
            print(f"⚠️ Skipped {file} due to failed resample.")
    else:
        print(f"⚠️ Skipped {file} due to empty trials.")

# Step 4: Cascade
abhay_cascaded = []
arjun_cascaded = []

for i in range(len(valid_filenames)):
    try:
        abhay_cascaded.append(cascade_trials(all_abhay_trial1[i], all_abhay_trial2[i], ['Abhay_time']))
        arjun_cascaded.append(cascade_trials(all_arjun_trial1[i], all_arjun_trial2[i], ['Arjun_time']))
    except Exception as e:
        print(f"❌ Cascade error on {valid_filenames[i]}: {e}")

# Step 5: Final Output
abhay_df_final = pd.DataFrame(abhay_cascaded)
arjun_df_final = pd.DataFrame(arjun_cascaded)

print(f"\n✅ Cascaded Abhay shape: {abhay_df_final.shape}")
print(f"✅ Cascaded Arjun shape: {arjun_df_final.shape}")

# === Optional Save ===
# abhay_df_final.to_csv("abhay_cascaded.csv", index=False)
# arjun_df_final.to_csv("arjun_cascaded.csv", index=False)



📏 Using median trial length: 118
🚫 Skipped ASL - 4.csv due to missing time column in one or more trials.

✅ Cascaded Abhay shape: (199, 4248)
✅ Cascaded Arjun shape: (199, 4248)


In [189]:
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import StandardScaler

# === Column Definition ===
expected_columns = [
    'time', 'palm_position_x', 'palm_position_y', 'palm_position_z',
    'palm_normal_x', 'palm_normal_y', 'palm_normal_z',
    'palm_direction_x', 'palm_direction_y', 'palm_direction_z',
    'hand_grab_angle', 'hand_grab_strength', 'hand_pinch_angle', 'hand_pinch_strength',
    'thumb_extension', 'index_extension', 'middle_extension', 'ring_extension', 'pinky_extension'
]

# === Utilities ===
def is_row_empty(row):
    return all(pd.isna(cell) or (isinstance(cell, str) and cell.strip() == '') for cell in row)

def trim_leading_empty_rows(df):
    for i in range(len(df)):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[i:].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def trim_trailing_empty_rows(df):
    for i in reversed(range(len(df))):
        if not is_row_empty(df.iloc[i]):
            return df.iloc[:i+1].reset_index(drop=True)
    return pd.DataFrame(columns=df.columns)

def find_trial_split_index(df):
    for i in range(len(df)):
        if is_row_empty(df.iloc[i]):
            if i+1 < len(df) and is_row_empty(df.iloc[i+1]):
                return i
            return i
    return None

def split_trials(df):
    df = trim_leading_empty_rows(df)
    split_idx = find_trial_split_index(df)
    if split_idx is None:
        return df.reset_index(drop=True), pd.DataFrame(columns=df.columns)

    trial1 = df.iloc[:split_idx]
    trial2 = df.iloc[split_idx+1:]

    trial1 = trim_trailing_empty_rows(trial1)
    trial2 = trim_leading_empty_rows(trial2)
    trial2 = trim_trailing_empty_rows(trial2)

    return trial1.reset_index(drop=True), trial2.reset_index(drop=True)

def resample_by_time(df, time_col, target_rows):
    df = df.copy()

    df[time_col] = pd.to_numeric(df[time_col], errors='coerce')
    df = df.dropna(subset=[time_col])
    df = df.drop_duplicates(subset=time_col)

    for col in df.columns:
        if col != time_col:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    df = df.set_index(time_col)
    new_time_index = np.linspace(df.index.min(), df.index.max(), target_rows)
    df_resampled = df.reindex(new_time_index)
    df_resampled = df_resampled.interpolate(method='linear', axis=0).reset_index()
    df_resampled.rename(columns={'index': time_col}, inplace=True)

    return df_resampled

def safe_resample(df_list, time_col, target_rows):
    resampled = []
    for i, df in enumerate(df_list):
        if time_col not in df.columns or df.empty:
            print(f"⚠️ Skipping resample: Missing or empty {time_col}")
            continue
        resampled.append(resample_by_time(df, time_col, target_rows))
    return resampled


def standardize_df(df, exclude_cols):
    scaler = StandardScaler()
    cols_to_scale = [col for col in df.columns if col not in exclude_cols]
    df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])
    return df

def cascade_df(df):
    return pd.Series(df.to_numpy().T.flatten())

# === MAIN ===
input_folder = r"C:\Users\Abhay\Downloads\Sheets"
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

all_abhay_trial1, all_abhay_trial2 = [], []
all_arjun_trial1, all_arjun_trial2 = [], []

for file in csv_files:
    path = os.path.join(input_folder, file)
    try:
        df = pd.read_csv(path, header=None)
        df = df.iloc[1:].reset_index(drop=True)
        df = df.iloc[:, :39]  # 19 Abhay + 1 blank + 19 Arjun

        abhay_df = df.iloc[:, :19].copy()
        arjun_df = df.iloc[:, 20:].copy()
        
        abhay_df.columns = [f"Abhay_{col}" for col in expected_columns]
        arjun_df.columns = [f"Arjun_{col}" for col in expected_columns]

        abhay_df.dropna(axis=1, how='all', inplace=True)
        arjun_df.dropna(axis=1, how='all', inplace=True)

        abhay_t1, abhay_t2 = split_trials(abhay_df)
        arjun_t1, arjun_t2 = split_trials(arjun_df)

        all_abhay_trial1.append(abhay_t1)
        all_abhay_trial2.append(abhay_t2)
        all_arjun_trial1.append(arjun_t1)
        all_arjun_trial2.append(arjun_t2)
        print(f"\nLoaded {len(csv_files)} files.")
        print(f"Abhay Trial 1: {len(all_abhay_trial1)}")
        print(f"Abhay Trial 2: {len(all_abhay_trial2)}")
        print(f"Arjun Trial 1: {len(all_arjun_trial1)}")
        print(f"Arjun Trial 2: {len(all_arjun_trial2)}")


    except Exception as e:
        print(f"Error processing {file}: {e}")

# === Compute median trial length ===
all_lengths = [len(df) for df in all_abhay_trial1 + all_abhay_trial2 + all_arjun_trial1 + all_arjun_trial2]
median_len = int(np.median(all_lengths))
print(f"Using median trial length: {median_len}")

# === Resample all ===
all_abhay_trial1 = safe_resample(all_abhay_trial1, 'Abhay_time', median_len)
all_abhay_trial2 = safe_resample(all_abhay_trial2, 'Abhay_time', median_len)
all_arjun_trial1 = safe_resample(all_arjun_trial1, 'Arjun_time', median_len)
all_arjun_trial2 = safe_resample(all_arjun_trial2, 'Arjun_time', median_len)

# === Standardize all ===
all_abhay_trial1 = [standardize_df(df, ['Abhay_time']) for df in all_abhay_trial1]
all_abhay_trial2 = [standardize_df(df, ['Abhay_time']) for df in all_abhay_trial2]
all_arjun_trial1 = [standardize_df(df, ['Arjun_time']) for df in all_arjun_trial1]
all_arjun_trial2 = [standardize_df(df, ['Arjun_time']) for df in all_arjun_trial2]

# === Cascade (flatten) each trial with printout ===
abhay_cascaded = []
arjun_cascaded = []

print("\n=== Cascading Abhay Trials ===")
for i, (t1, t2) in enumerate(zip(all_abhay_trial1, all_abhay_trial2)):
    print(f"\nAbhay Trial {i+1} - T1 shape: {t1.shape}, T2 shape: {t2.shape}")
    print("T1 Columns:", list(t1.columns))
    print("T2 Columns:", list(t2.columns))
    flat_t1 = cascade_df(t1)
    flat_t2 = cascade_df(t2)
    print(f"Cascaded T1 first 10 entries: {flat_t1.head(10).to_list()}")
    print(f"Cascaded T2 first 10 entries: {flat_t2.head(10).to_list()}")
    cascaded = pd.concat([flat_t1, flat_t2], ignore_index=True)
    print(f"Combined length: {len(cascaded)}")
    abhay_cascaded.append(cascaded)

print("\n=== Cascading Arjun Trials ===")
for i, (t1, t2) in enumerate(zip(all_arjun_trial1, all_arjun_trial2)):
    print(f"\nArjun Trial {i+1} - T1 shape: {t1.shape}, T2 shape: {t2.shape}")
    print("T1 Columns:", list(t1.columns))
    print("T2 Columns:", list(t2.columns))
    flat_t1 = cascade_df(t1)
    flat_t2 = cascade_df(t2)
    print(f"Cascaded T1 first 10 entries: {flat_t1.head(10).to_list()}")
    print(f"Cascaded T2 first 10 entries: {flat_t2.head(10).to_list()}")
    cascaded = pd.concat([flat_t1, flat_t2], ignore_index=True)
    print(f"Combined length: {len(cascaded)}")
    arjun_cascaded.append(cascaded)

# Turn lists of Series into DataFrames
abhay_cascaded = pd.DataFrame(abhay_cascaded)
arjun_cascaded = pd.DataFrame(arjun_cascaded)


# === Final Output ===
print("\nFinal Cascaded Shapes:")
print(f"Abhay final table: {abhay_cascaded.shape}")
print(f"Arjun final table: {arjun_cascaded.shape}")

# === Optional Save ===
# abhay_cascaded.to_csv("abhay_final_table.csv", index=False)
# arjun_cascaded.to_csv("arjun_final_table.csv", index=False)



Loaded 100 files.
Abhay Trial 1: 1
Abhay Trial 2: 1
Arjun Trial 1: 1
Arjun Trial 2: 1

Loaded 100 files.
Abhay Trial 1: 2
Abhay Trial 2: 2
Arjun Trial 1: 2
Arjun Trial 2: 2

Loaded 100 files.
Abhay Trial 1: 3
Abhay Trial 2: 3
Arjun Trial 1: 3
Arjun Trial 2: 3

Loaded 100 files.
Abhay Trial 1: 4
Abhay Trial 2: 4
Arjun Trial 1: 4
Arjun Trial 2: 4

Loaded 100 files.
Abhay Trial 1: 5
Abhay Trial 2: 5
Arjun Trial 1: 5
Arjun Trial 2: 5

Loaded 100 files.
Abhay Trial 1: 6
Abhay Trial 2: 6
Arjun Trial 1: 6
Arjun Trial 2: 6

Loaded 100 files.
Abhay Trial 1: 7
Abhay Trial 2: 7
Arjun Trial 1: 7
Arjun Trial 2: 7

Loaded 100 files.
Abhay Trial 1: 8
Abhay Trial 2: 8
Arjun Trial 1: 8
Arjun Trial 2: 8

Loaded 100 files.
Abhay Trial 1: 9
Abhay Trial 2: 9
Arjun Trial 1: 9
Arjun Trial 2: 9

Loaded 100 files.
Abhay Trial 1: 10
Abhay Trial 2: 10
Arjun Trial 1: 10
Arjun Trial 2: 10

Loaded 100 files.
Abhay Trial 1: 11
Abhay Trial 2: 11
Arjun Trial 1: 11
Arjun Trial 2: 11

Loaded 100 files.
Abhay Trial 1: 1

In [183]:
arjun_df.head()

Unnamed: 0,Arjun_time,Arjun_palm_position_x,Arjun_palm_position_y,Arjun_palm_position_z,Arjun_palm_normal_x,Arjun_palm_normal_y,Arjun_palm_normal_z,Arjun_palm_direction_x,Arjun_palm_direction_y,Arjun_palm_direction_z,Arjun_hand_grab_angle,Arjun_hand_grab_strength,Arjun_hand_pinch_angle,Arjun_hand_pinch_strength,Arjun_thumb_extension,Arjun_index_extension,Arjun_middle_extension,Arjun_ring_extension,Arjun_pinky_extension
0,0.527629,-18.850277,205.138412,59.485855,-0.06103,-0.993123,-0.099906,-0.1747,0.109176,-0.97855,0.06689,0,58.273132,0,1,1,1,1,1
1,0.527814,-21.250713,205.505478,58.560154,-0.05092,-0.992868,-0.107798,-0.174473,0.115122,-0.977909,0.089579,0,58.863861,0,1,1,1,1,1
2,1.021311,39.32402,209.112381,60.334293,-0.046118,-0.987318,-0.151911,0.015185,0.151362,-0.988362,0.13873,0,58.117531,0,1,1,1,1,1
3,1.030339,27.043562,205.142303,59.409454,-0.058357,-0.987072,-0.149278,0.00973,0.148964,-0.988795,0.149993,0,58.307758,0,1,1,1,1,1
4,1.030611,23.282059,203.692307,58.796223,-0.068699,-0.986766,-0.146879,0.000986,0.14716,-0.989112,0.141777,0,58.202579,0,1,1,1,1,1
