### **fuse (combine) the 4 limbs csv files in one csv file**

In [1]:
import os
import pandas as pd

In [2]:
input_base = r'C:\uni\goalkeeper_data'
output_base = r'C:\uni\just_testing'

In [3]:
limb_files = [
    ('RightArm.csv', 'RA'),
    ('LeftArm.csv', 'LA'),
    ('RightLeg.csv', 'RL'),
    ('LeftLeg.csv', 'LL')
]

In [4]:
def prefix_columns(df, prefix, skip_cols=['server_timestamp', 'session_id', 'goalkeeper_id', 'shot_result', 'type']):
    return df.rename(columns={col: f"{prefix}_{col}" for col in df.columns if col not in skip_cols})

In [5]:
for player in os.listdir(input_base):
    player_path = os.path.join(input_base, player)
    if not os.path.isdir(player_path): continue

    for session in os.listdir(player_path):
        session_path = os.path.join(player_path, session)
        if not os.path.isdir(session_path): continue

        dfs = {}
        lengths = []

        for filename, prefix in limb_files:
            path = os.path.join(session_path, filename)
            if not os.path.exists(path):
                print(f"Missing {filename} in {session_path}")
                break
            df = pd.read_csv(path)
            dfs[prefix] = df
            lengths.append(len(df))

        if len(dfs) != 4:
            print(f"Skipping session due to missing limbs: {session_path}")
            continue

        min_len = min(lengths)

        shortest_idx = lengths.index(min_len)
        timestamp_source_prefix = limb_files[shortest_idx][1]
        server_timestamp = dfs[timestamp_source_prefix]['server_timestamp'].iloc[:min_len].reset_index(drop=True)

        base_cols = ['session_id', 'goalkeeper_id', 'shot_result', 'type']
        base_info = dfs[timestamp_source_prefix][base_cols].iloc[:min_len].reset_index(drop=True)

        fused_signals = []
        for prefix, df in dfs.items():
            sensor_data = df.drop(columns=['server_timestamp', *base_cols]).iloc[:min_len].reset_index(drop=True)
            sensor_data = prefix_columns(sensor_data, prefix)
            fused_signals.append(sensor_data)

        fused_df = pd.concat([server_timestamp, base_info] + fused_signals, axis=1)

        out_dir = os.path.join(output_base, player)
        os.makedirs(out_dir, exist_ok=True)
        out_path = os.path.join(out_dir, f"{session}.csv")
        fused_df.to_csv(out_path, index=False)
        print(f"Fused: {out_path}")

Fused: C:\uni\just_testing\goalkeeper_10\session_1.csv
Fused: C:\uni\just_testing\goalkeeper_100\session_1.csv
Fused: C:\uni\just_testing\goalkeeper_1000\session_1.csv
Fused: C:\uni\just_testing\goalkeeper_1000\session_2.csv
Missing LeftArm.csv in C:\uni\goalkeeper_data\goalkeeper_11\session_4
Skipping session due to missing limbs: C:\uni\goalkeeper_data\goalkeeper_11\session_4
Missing LeftArm.csv in C:\uni\goalkeeper_data\goalkeeper_15\session_4
Skipping session due to missing limbs: C:\uni\goalkeeper_data\goalkeeper_15\session_4
Fused: C:\uni\just_testing\goalkeeper_16\session_4.csv
Fused: C:\uni\just_testing\goalkeeper_20\session_1.csv
Fused: C:\uni\just_testing\goalkeeper_201\session_1.csv
Fused: C:\uni\just_testing\goalkeeper_203\session_1.csv
Fused: C:\uni\just_testing\goalkeeper_204\session_1.csv
Fused: C:\uni\just_testing\goalkeeper_205\session_1.csv
Fused: C:\uni\just_testing\goalkeeper_22\session_1.csv
Fused: C:\uni\just_testing\goalkeeper_30\session_1.csv
Fused: C:\uni\just_

In [6]:
# =================== COMBINE ALL FUSED FILES INTO ONE ===================
import os
import pandas as pd

output_base = r'C:\uni\just_testing'

all_fused_dfs = []
session_count = 0

print("\n" + "="*60)
print("COMBINING ALL FUSED SESSION FILES INTO ONE DATASET")
print("="*60)

# Check if output directory exists
if not os.path.exists(output_base):
    print(f"Output directory not found: {output_base}")
    print("Make sure the first part ran successfully!")
else:
    # Loop through all player folders
    for player in os.listdir(output_base):
        player_path = os.path.join(output_base, player)
        
        # Skip if not a directory
        if not os.path.isdir(player_path):
            continue
        
        print(f"Processing player: {player}")
        
        # Loop through all session files in player folder
        for session_file in os.listdir(player_path):
            if session_file.endswith('.csv'):
                session_path = os.path.join(player_path, session_file)
                
                try:
                    # Read the session CSV
                    df = pd.read_csv(session_path)
                    all_fused_dfs.append(df)
                    session_count += 1
                    print(f"  Added: {session_file} ({len(df)} rows)")
                except Exception as e:
                    print(f"  Error reading {session_file}: {e}")
    
    # Combine all dataframes
    if all_fused_dfs:
        print(f"\nCombining {session_count} session files...")
        combined_df = pd.concat(all_fused_dfs, ignore_index=True)
        
        # Save the combined file
        combined_path = os.path.join(output_base, 'fused_data.csv')
        combined_df.to_csv(combined_path, index=False)
        
        print(f"✅ Successfully created: {combined_path}")
        print(f"Total rows: {len(combined_df):,}")
        print(f"Total columns: {len(combined_df.columns)}")
        
        # Show sample of the combined data
        print("\nFirst few rows of combined data:")
        print(combined_df.head())
        print("\nColumns in combined data:")
        print(list(combined_df.columns))
    else:
        print("❌ No fused session files found!")
        print("Make sure:")
        print("1. Your input directory has the limb CSV files")
        print("2. The first part of the code ran successfully")
        print("3. Check the folder structure:")
        print(f"   {output_base}\\player_name\\session_name.csv")


COMBINING ALL FUSED SESSION FILES INTO ONE DATASET
Processing player: goalkeeper_10
  Added: session_1.csv (1162 rows)
Processing player: goalkeeper_100
  Added: session_1.csv (241 rows)
Processing player: goalkeeper_1000
  Added: session_1.csv (1016 rows)
  Added: session_2.csv (779 rows)
Processing player: goalkeeper_16
  Added: session_4.csv (198 rows)
Processing player: goalkeeper_20
  Added: session_1.csv (358 rows)
Processing player: goalkeeper_201
  Added: session_1.csv (8 rows)
Processing player: goalkeeper_203
  Added: session_1.csv (455 rows)
Processing player: goalkeeper_204
  Added: session_1.csv (2353 rows)
Processing player: goalkeeper_205
  Added: session_1.csv (558 rows)
Processing player: goalkeeper_22
  Added: session_1.csv (572 rows)
Processing player: goalkeeper_30
  Added: session_1.csv (83 rows)
Processing player: goalkeeper_31
  Added: session_1.csv (272 rows)
Processing player: goalkeeper_55
  Added: session_1.csv (189 rows)
Processing player: goalkeeper_6
  Ad

### **add the labels (coach feedback) to the players fused data**

In [9]:
motion_df = pd.read_csv(r'C:\uni\just_testing\fused_data.csv')
feedback_df = pd.read_csv(r'C:\uni\just_testing\coach_feedback.csv')

  motion_df = pd.read_csv(r'C:\uni\just_testing\fused_data.csv')


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\uni\\just_testing\\coach_feedback.csv'

In [None]:
motion_df['timestamp_sec'] = pd.to_datetime(motion_df['server_timestamp']).dt.floor('s')
feedback_df['timestamp_sec'] = pd.to_datetime(feedback_df['server_timestamp']).dt.floor('s')

NameError: name 'motion_df' is not defined

: 

In [None]:
motion_df['coach_feedback'] = 'undefined'

In [None]:
for _, feedback_row in feedback_df.iterrows():
    session = feedback_row['session_id']
    keeper = feedback_row['goalkeeper_id']
    feedback_time = feedback_row['timestamp_sec']
    feedback_value = feedback_row['coach_feedback']

    # define the (+-) 2 second time window
    start_time = feedback_time - pd.Timedelta(seconds=2)
    end_time = feedback_time + pd.Timedelta(seconds=2)

    condition = (
        (motion_df['session_id'] == session) &
        (motion_df['goalkeeper_id'] == keeper) &
        (motion_df['timestamp_sec'] >= start_time) &
        (motion_df['timestamp_sec'] <= end_time)
    )

    motion_df.loc[condition, 'coach_feedback'] = feedback_value


In [None]:
motion_df.drop(columns='timestamp_sec', inplace=True)

In [None]:
motion_df.to_csv('fused_labeled_data.csv', index=False)
print('dataset saved')