In [5]:
import pandas as pd
import numpy as np
import os

room_paths = {
    'kitchen': 'unity_csvs/kitchen/mem/kitchenmem',
    'kitchen_motion': 'unity_csvs/kitchen_motion/mem/kitchenmmem',
    'kitchen_person': 'unity_csvs/kitchen_person/mem/kpmem',
    'kitchen_object': 'unity_csvs/kitchen_object/mem/komem',
    'hallway': 'unity_csvs/hallway/mem/hallmem',
    'hallway_motion': 'unity_csvs/hallway_motion/mem/hallmmem',
    'lab': 'unity_csvs/lab/mem/labmem',
    'lab_motion': 'unity_csvs/lab_motion/mem/labmmem',
    'lab_person': 'unity_csvs/lab_person/mem/labpmem',
    'lab_object': 'unity_csvs/lab_object/mem/labomem',
    'blinds_up': 'unity_csvs/blinds_up/mem/bumem',
    'blinds_up_motion': 'unity_csvs/blinds_up_motion/mem/bummem',
    'blinds_up_person': 'unity_csvs/blinds_up_person/mem/bupmem',
    'blinds_up_object': 'unity_csvs/blinds_up_object/mem/buomem',
    'blinds_down': 'unity_csvs/blinds_down/mem/bdmem',
    'blinds_down_motion': 'unity_csvs/blinds_down_motion/mem/bdmmem',
    'blinds_down_person': 'unity_csvs/blinds_down_person/mem/bdpmem',
    'blinds_down_object': 'unity_csvs/blinds_down_object/mem/bdomem',
}

cpu_paths = {
    'kitchen': 'unity_csvs/kitchen/cpu/kitchen',
    'kitchen_motion': 'unity_csvs/kitchen_motion/cpu/kitchenm',
    'kitchen_person': 'unity_csvs/kitchen_person/cpu/kp',
    'kitchen_object': 'unity_csvs/kitchen_object/cpu/ko',
    'hallway': 'unity_csvs/hallway/cpu/hall',
    'hallway_motion': 'unity_csvs/hallway_motion/cpu/hallm',
    'lab': 'unity_csvs/lab/cpu/lab',
    'lab_motion': 'unity_csvs/lab_motion/cpu/labm',
    'lab_person': 'unity_csvs/lab_person/cpu/labp',
    'lab_object': 'unity_csvs/lab_object/cpu/labo',
    'blinds_up': 'unity_csvs/blinds_up/cpu/bu',
    'blinds_up_motion': 'unity_csvs/blinds_up_motion/cpu/bum',
    'blinds_up_person': 'unity_csvs/blinds_up_person/cpu/bup',
    'blinds_up_object': 'unity_csvs/blinds_up_object/cpu/buo',
    'blinds_down': 'unity_csvs/blinds_down/cpu/bd',
    'blinds_down_motion': 'unity_csvs/blinds_down_motion/cpu/bdm',
    'blinds_down_person': 'unity_csvs/blinds_down_person/cpu/bdp',
    'blinds_down_object': 'unity_csvs/blinds_down_object/cpu/bdo',
}

# Room labels (for classification target)
room_labels = {
    'kitchen': 'kitchen',
    'kitchen_motion': 'kitchen',
    'kitchen_person': 'kitchen',
    'kitchen_object': 'kitchen',
    'hallway': 'hallway',
    'hallway_motion': 'hallway',
    'lab': 'lab',
    'lab_motion': 'lab',
    'lab_person': 'lab',
    'lab_object': 'lab',
    'blinds_up': 'blinds_up',
    'blinds_up_motion': 'blinds_up',
    'blinds_up_person': 'blinds_up',
    'blinds_up_object': 'blinds_up',
    'blinds_down': 'blinds_down',
    'blinds_down_motion': 'blinds_down',
    'blinds_down_person': 'blinds_down',
    'blinds_down_object': 'blinds_down',
}

# Feature lists
UO_TYPES = ['Mesh', 'GameObject', 'MeshFilter', 'MeshRenderer', 'MonoBehaviour', 'Transform']

AMO_TYPES = [
    'Meta.XR.MRUtilityKit.EffectMesh.EffectMeshObject',
    'Meta.XR.MRUtilityKit.MRUKAnchor',
    'Meta.XR.MRUtilityKit.MRUKAnchor[]',
    'System.Collections.Generic.List<Meta.XR.MRUtilityKit.MRUKAnchor>',
    'System.Collections.Generic.List<UnityEngine.Vector2>',
    'UnityEngine.GameObject',
    'UnityEngine.Mesh',
    'UnityEngine.MeshFilter',
    'UnityEngine.MeshRenderer',
    'UnityEngine.Transform',
    'UnityEngine.Vector2[]',
    'System.Collections.Generic.Dictionary.Entry<Meta.XR.MRUtilityKit.MRUKAnchor,Meta.XR.MRUtilityKit.EffectMesh.EffectMeshObject>[]'
]

CPU_FEATURES = [
    'meta.xr.mrutilitykit.dll!Meta.XR.MRUtilityKit::MRUK.Update() [Invoke]',
    'Update.ScriptRunBehaviourUpdate',
    'BehaviourUpdate',
    'CullResults.CreateSharedRendererScene',
    'Shadows.CullingCallbacks',
    'CullSendEvents'
]

UO_NAME_CATEGORIES = [
    'BED',
    'CEILING', 'CEILING_EffectMesh',
    'COUCH',
    'DOOR_FRAME', 'DOOR_FRAME_EffectMesh',
    'FLOOR', 'FLOOR_EffectMesh',
    'GLOBAL_MESH',
    'INVISIBLE_WALL_FACE', 'INVISIBLE_WALL_FACE_EffectMesh',
    'LAMP', 'LAMP_EffectMesh',
    'OTHER', 'OTHER_EffectMesh',
    'PLANT',
    'SCREEN', 'SCREEN_EffectMesh',
    'STORAGE', 'STORAGE_EffectMesh',
    'TABLE', 'TABLE_EffectMesh',
    'WALL_ART', 'WALL_ART_EffectMesh',
    'WALL_FACE', 'WALL_FACE_EffectMesh',
    'WINDOW_FRAME', 'WINDOW_FRAME_EffectMesh',
]

GRAPHICS_NAME_CATEGORIES = [
    'CEILING',
    'DOOR_FRAME',
    'FLOOR',
    'INVISIBLE_WALL_FACE',
    'LAMP',
    'OTHER',
    'SCREEN',
    'STORAGE',
    'TABLE',
    'WALL_ART',
    'WALL_FACE',
    'WINDOW_FRAME',
]

def extract_unity_objects_features(filepath):
    """Extract features from UnityObjects.csv"""
    df = pd.read_csv(filepath)
    features = {}
    
    # Aggregate by Type for specific types
    for t in UO_TYPES:
        subset = df[df['Type'] == t]
        if len(subset) > 0:
            features[f'UO_Type_{t}_allocated_sum'] = subset['Allocated(MB)'].sum()
            features[f'UO_Type_{t}_allocated_count'] = len(subset)
            features[f'UO_Type_{t}_allocated_mean'] = subset['Allocated(MB)'].mean()
            features[f'UO_Type_{t}_resident_sum'] = subset['Resident(MB)'].sum()
            features[f'UO_Type_{t}_resident_mean'] = subset['Resident(MB)'].mean()
        else:
            features[f'UO_Type_{t}_allocated_sum'] = 0
            features[f'UO_Type_{t}_allocated_count'] = 0
            features[f'UO_Type_{t}_allocated_mean'] = 0
            features[f'UO_Type_{t}_resident_sum'] = 0
            features[f'UO_Type_{t}_resident_mean'] = 0
    
    # Aggregate by NameOfObject for each furniture/object category
    for name_cat in UO_NAME_CATEGORIES:
        subset = df[df['NameOfObject'] == name_cat]
        if len(subset) > 0:
            features[f'UO_Name_{name_cat}_count'] = len(subset)
            features[f'UO_Name_{name_cat}_allocated_sum'] = subset['Allocated(MB)'].sum()
            features[f'UO_Name_{name_cat}_allocated_mean'] = subset['Allocated(MB)'].mean()
        else:
            features[f'UO_Name_{name_cat}_count'] = 0
            features[f'UO_Name_{name_cat}_allocated_sum'] = 0
            features[f'UO_Name_{name_cat}_allocated_mean'] = 0
    
    # Count total "Room -" objects
    room_objects = df[df['NameOfObject'].str.startswith('Room -', na=False)]
    features['UO_Room_count'] = len(room_objects)
    features['UO_Room_allocated_sum'] = room_objects['Allocated(MB)'].sum()
    
    return features
    

def extract_graphics_features(filepath):
    """Extract features from Graphics.csv"""
    df = pd.read_csv(filepath)
    features = {}
    
    # Aggregate by NameOfObject for each furniture category
    for name_cat in GRAPHICS_NAME_CATEGORIES:
        subset = df[df['NameOfObject'] == name_cat]
        if len(subset) > 0:
            features[f'Graphics_{name_cat}_count'] = len(subset)
            features[f'Graphics_{name_cat}_allocated_sum'] = subset['Allocated(MB)'].sum()
            features[f'Graphics_{name_cat}_allocated_mean'] = subset['Allocated(MB)'].mean()
        else:
            features[f'Graphics_{name_cat}_count'] = 0
            features[f'Graphics_{name_cat}_allocated_sum'] = 0
            features[f'Graphics_{name_cat}_allocated_mean'] = 0
    
    return features
    

def extract_amo_features(filepath):
    """Extract features from AllManagedObjects.csv"""
    df = pd.read_csv(filepath)
    features = {}
    
    for t in AMO_TYPES:
        subset = df[df['Type'] == t]
        # Clean type name for column naming
        clean_name = t.replace('.', '_').replace('<', '_').replace('>', '_').replace('[', '_').replace(']', '_')
        
        if len(subset) > 0:
            features[f'AMO_{clean_name}_allocated_sum'] = subset['Allocated(MB)'].sum()
            features[f'AMO_{clean_name}_count_sum'] = subset['Count'].sum()
            features[f'AMO_{clean_name}_allocated_mean'] = subset['Allocated(MB)'].mean()
        else:
            features[f'AMO_{clean_name}_allocated_sum'] = 0
            features[f'AMO_{clean_name}_count_sum'] = 0
            features[f'AMO_{clean_name}_allocated_mean'] = 0
    
    return features


def extract_cpu_features(filepath):
    """Extract features from CPU csv"""
    df = pd.read_csv(filepath, sep=';')
    df.columns = df.columns.str.strip()
    features = {}
    
    for feature_name in CPU_FEATURES:
        subset = df[df['Name'] == feature_name]
        clean_name = feature_name.replace('.', '_').replace(':', '_').replace('!', '_').replace(' ', '_').replace('[', '_').replace(']', '_')
        
        if len(subset) > 0:
            features[f'CPU_{clean_name}_median_time'] = subset['Median Time'].values[0]
        else:
            features[f'CPU_{clean_name}_median_time'] = 0
    
    return features


def extract_all_features(room, trial):
    """Extract all features for a single scan"""
    features = {}
    
    # Add metadata
    features['room'] = room
    features['room_label'] = room_labels[room]
    features['trial'] = trial
    
    # Memory paths
    mem_path = f"{room_paths[room]}{trial}"
    
    # Extract from each CSV
    try:
        uo_features = extract_unity_objects_features(f"{mem_path}/UnityObjects.csv")
        features.update(uo_features)
    except Exception as e:
        print(f"Error reading UnityObjects for {room} trial {trial}: {e}")
    
    try:
        graphics_features = extract_graphics_features(f"{mem_path}/Graphics.csv")
        features.update(graphics_features)
    except Exception as e:
        print(f"Error reading Graphics for {room} trial {trial}: {e}")
    
    try:
        amo_features = extract_amo_features(f"{mem_path}/AllManagedObjects.csv")
        features.update(amo_features)
    except Exception as e:
        print(f"Error reading AMO for {room} trial {trial}: {e}")
    
    # CPU path
    try:
        cpu_features = extract_cpu_features(f"{cpu_paths[room]}{trial}.csv")
        features.update(cpu_features)
    except Exception as e:
        print(f"Error reading CPU for {room} trial {trial}: {e}")
    
    return features

In [7]:
# Test on a single scan
room = 'kitchen'
trial = 1

# Extract all features
features = extract_all_features(room, trial)

# Convert to DataFrame
test_df = pd.DataFrame([features])

# Save to CSV
test_df.to_csv('test_single_scan.csv', index=False)

print(f"Saved to 'test_single_scan.csv'")
print(f"Shape: {test_df.shape}")
print(f"Number of features: {len(test_df.columns)}")

Saved to 'test_single_scan.csv'
Shape: (1, 197)
Number of features: 197


In [6]:
# Test on single file
room = 'kitchen'
trial = 1
mem_path = f"{room_paths[room]}{trial}"

graphics_features = extract_graphics_features(f"{mem_path}/Graphics.csv")

print(f"Extracted {len(graphics_features)} features:\n")
for k, v in graphics_features.items():
    if v != 0:  # Only show non-zero values
        print(f"  {k}: {v}")

Extracted 36 features:

  Graphics_CEILING_count: 1
  Graphics_FLOOR_count: 1
  Graphics_INVISIBLE_WALL_FACE_count: 1
  Graphics_OTHER_count: 8
  Graphics_OTHER_allocated_sum: 0.016
  Graphics_OTHER_allocated_mean: 0.002
  Graphics_STORAGE_count: 2
  Graphics_STORAGE_allocated_sum: 0.004
  Graphics_STORAGE_allocated_mean: 0.002
  Graphics_TABLE_count: 1
  Graphics_TABLE_allocated_sum: 0.002
  Graphics_TABLE_allocated_mean: 0.002
  Graphics_WALL_FACE_count: 3


In [11]:
import glob

# Add hallway_person to paths
room_paths['hallway_person'] = 'unity_csvs/hallway_person/mem/hallpmem'
cpu_paths['hallway_person'] = 'unity_csvs/hallway_person/cpu/hallp'
room_labels['hallway_person'] = 'hallway'

# Columns that should be imputed with 0 (furniture/room objects - absence means they don't exist)
IMPUTE_ZERO_PREFIXES = [
    'UO_Name_',      # Furniture names from UnityObjects
    'Graphics_',     # Furniture from Graphics
    'UO_Room_',      # Room counts
]

# Build the full dataset
all_data = []

for room in room_paths.keys():
    # Find all trials that exist for this room by checking for UnityObjects.csv files
    trial = 1
    while True:
        mem_path = f"{room_paths[room]}{trial}/UnityObjects.csv"
        if os.path.exists(mem_path):
            print(f"Processing {room} trial {trial}...")
            features = extract_all_features(room, trial)
            all_data.append(features)
            trial += 1
        else:
            print(f"  -> {room}: {trial - 1} trials found")
            break

# Create DataFrame
df_final = pd.DataFrame(all_data)

print(f"\n=== INITIAL DATASET ===")
print(f"Shape: {df_final.shape}")
print(f"Total scans: {len(all_data)}")

# Check for NaN values
nan_counts = df_final.isnull().sum()
nan_cols = nan_counts[nan_counts > 0]
print(f"\nColumns with NaN values: {len(nan_cols)}")

# Separate columns into impute-with-zero vs drop-if-nan
impute_zero_cols = []
other_cols = []

for col in df_final.columns:
    if any(col.startswith(prefix) for prefix in IMPUTE_ZERO_PREFIXES):
        impute_zero_cols.append(col)
    else:
        other_cols.append(col)

print(f"\nColumns to impute with 0: {len(impute_zero_cols)}")
print(f"Other columns: {len(other_cols)}")

# Impute furniture/room columns with 0
for col in impute_zero_cols:
    df_final[col] = df_final[col].fillna(0)

# For other columns, check if there are still NaN values
remaining_nan = df_final[other_cols].isnull().sum()
cols_with_nan = remaining_nan[remaining_nan > 0]

if len(cols_with_nan) > 0:
    print(f"\n=== COLUMNS WITH REMAINING NaN (will be dropped) ===")
    print(cols_with_nan)
    
    # Drop columns that still have NaN
    df_final = df_final.drop(columns=cols_with_nan.index.tolist())

print(f"\n=== FINAL DATASET ===")
print(f"Shape: {df_final.shape}")
print(f"Total NaN values: {df_final.isnull().sum().sum()}")

# Save to CSV
df_final.to_csv('room_classification_features.csv', index=False)
print(f"\nSaved to 'room_classification_features.csv'")

Processing kitchen trial 1...
Processing kitchen trial 2...
Processing kitchen trial 3...
Processing kitchen trial 4...
Processing kitchen trial 5...
  -> kitchen: 5 trials found
Processing kitchen_motion trial 1...
Processing kitchen_motion trial 2...
Processing kitchen_motion trial 3...
Processing kitchen_motion trial 4...
Processing kitchen_motion trial 5...
  -> kitchen_motion: 5 trials found
Processing kitchen_person trial 1...
Processing kitchen_person trial 2...
Processing kitchen_person trial 3...
Processing kitchen_person trial 4...
Processing kitchen_person trial 5...
  -> kitchen_person: 5 trials found
Processing kitchen_object trial 1...
Processing kitchen_object trial 2...
Processing kitchen_object trial 3...
Processing kitchen_object trial 4...
Processing kitchen_object trial 5...
  -> kitchen_object: 5 trials found
Processing hallway trial 1...
Processing hallway trial 2...
Processing hallway trial 3...
Processing hallway trial 4...
Processing hallway trial 5...
Processi

In [12]:
df_final

Unnamed: 0,room,room_label,trial,UO_Type_Mesh_allocated_sum,UO_Type_Mesh_allocated_count,UO_Type_Mesh_allocated_mean,UO_Type_Mesh_resident_sum,UO_Type_Mesh_resident_mean,UO_Type_GameObject_allocated_sum,UO_Type_GameObject_allocated_count,...,AMO_UnityEngine_Vector2___allocated_mean,"AMO_System_Collections_Generic_Dictionary_Entry_Meta_XR_MRUtilityKit_MRUKAnchor,Meta_XR_MRUtilityKit_EffectMesh_EffectMeshObject____allocated_sum","AMO_System_Collections_Generic_Dictionary_Entry_Meta_XR_MRUtilityKit_MRUKAnchor,Meta_XR_MRUtilityKit_EffectMesh_EffectMeshObject____count_sum","AMO_System_Collections_Generic_Dictionary_Entry_Meta_XR_MRUtilityKit_MRUKAnchor,Meta_XR_MRUtilityKit_EffectMesh_EffectMeshObject____allocated_mean",CPU_meta_xr_mrutilitykit_dll_Meta_XR_MRUtilityKit__MRUK_Update()__Invoke__median_time,CPU_Update_ScriptRunBehaviourUpdate_median_time,CPU_BehaviourUpdate_median_time,CPU_CullResults_CreateSharedRendererScene_median_time,CPU_Shadows_CullingCallbacks_median_time,CPU_CullSendEvents_median_time
0,kitchen,kitchen,1,0.575,21,0.027381,0.543,0.025857,0.0,250,...,0.001,0.0,1,0.0,0.197500,0.528854,0.527344,0.044271,0.002240,0.007968
1,kitchen,kitchen,2,0.575,21,0.027381,0.543,0.025857,0.0,250,...,0.001,0.0,1,0.0,0.170677,0.505938,0.504636,0.045260,0.002500,0.007969
2,kitchen,kitchen,3,0.575,21,0.027381,0.543,0.025857,0.0,250,...,0.001,0.0,1,0.0,0.199844,0.524635,0.523646,0.046198,0.002240,0.008282
3,kitchen,kitchen,4,0.575,21,0.027381,0.543,0.025857,0.0,250,...,0.001,0.0,1,0.0,0.181302,0.533750,0.531667,0.045209,0.002396,0.007864
4,kitchen,kitchen,5,0.575,21,0.027381,0.543,0.025857,0.0,250,...,0.001,0.0,1,0.0,0.210313,0.547761,0.546771,0.046094,0.002344,0.007917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,hallway_person,hallway,1,0.081,12,0.006750,0.069,0.005750,0.0,232,...,0.001,0.0,1,0.0,0.156563,0.489166,0.487865,0.038750,0.002187,0.007916
104,hallway_person,hallway,2,0.081,12,0.006750,0.069,0.005750,0.0,232,...,0.001,0.0,1,0.0,0.158073,0.497761,0.496822,0.044011,0.002291,0.007917
105,hallway_person,hallway,3,0.081,12,0.006750,0.069,0.005750,0.0,232,...,0.001,0.0,1,0.0,0.140469,0.464584,0.462032,0.043958,0.002188,0.008281
106,hallway_person,hallway,4,0.081,12,0.006750,0.069,0.005750,0.0,232,...,0.001,0.0,1,0.0,0.148854,0.470469,0.469739,0.039844,0.002240,0.007865


In [14]:
df_final.isna().sum().sum()

np.int64(0)