In [2]:
import lzma
import pickle
import pandas as pd
import numpy as np
import os
import re

# Load the Excel file with map names
excel_path = 'D:/ML HEALTHCARE/CARDIO FINAL PROJECT/MapsInfo.xlsx'
maps_df = pd.read_excel(excel_path)

# Create a dictionary mapping patient date to map_name
patient_to_map = dict(zip(maps_df['Patient'], maps_df['AF']))

# Directory containing EGM files
egms_dir = 'D:/ML HEALTHCARE/CARDIO FINAL PROJECT/EGMS'

# List all available files
egm_files = [f for f in os.listdir(egms_dir) if f.endswith('_EGMs.xz')]
print("Available EGM files:\n")
for i, f in enumerate(egm_files):
    print(f"{i}: {f}")


Available EGM files:

0: Patient 2021_09_30_EGMs.xz
1: Patient 2021_10_04_EGMs.xz
2: Patient 2021_10_05_EGMs.xz
3: Patient 2021_10_18_EGMs.xz
4: Patient 2021_10_19_EGMs.xz
5: Patient 2021_11_02_EGMs.xz
6: Patient 2021_11_08_EGMs.xz
7: Patient 2021_11_10_EGMs.xz
8: Patient 2021_11_12_EGMs.xz
9: Patient 2021_11_15_EGMs.xz
10: Patient 2021_11_16_EGMs.xz
11: Patient 2021_11_17_EGMs.xz


In [3]:
# SELECT WHICH FILE TO LOAD (change the index 0-11)
file_index = 0

filename = egm_files[file_index]
print(f"Loading: {filename}")

# Extract patient date from filename
match = re.search(r'Patient (\d{4}_\d{2}_\d{2})_EGMs\.xz', filename)
patient_date = match.group(1)
map_name = patient_to_map.get(patient_date)
print(f"Patient date: {patient_date}")
print(f"Map name: {map_name}")

# Load the file
load_file_path = os.path.join(egms_dir, filename)
with lzma.open(load_file_path, 'rb') as f:
    file_content = pickle.load(f)

# Show what's in the file
print(f"\nFile content keys: {list(file_content.keys())}")

Loading: Patient 2021_09_30_EGMs.xz
Patient date: 2021_09_30
Map name: 2-AI FA BIP FINDER PRE ABL

File content keys: ['patient_id', 'map', 'point_indices', 'num_points', 'unipolar', 'bipolar', 'reference', 'electrode_positions', 'point_unipolar', 'point_bipolar', 'Subsample']


In [None]:
# Get the map index and bipolar EGMs
map_index = file_content['map'].tolist().index(map_name)
bipolar_EGMs = file_content['bipolar'][map_index]

print(f"Bipolar EGMs shape: {bipolar_EGMs.shape}")
print(f"  - Rows (time samples): {bipolar_EGMs.shape[0]}")
print(f"  - Columns (points): {bipolar_EGMs.shape[1]}")

Bipolar EGMs shape: (1250, 10875)
  - Rows (time samples): 1250
  - Columns (points): 10875


In [5]:
# Convert to DataFrame for Data Wrangler visualization
# Each column is a different measurement point, rows are time samples
egm_df = pd.DataFrame(
    bipolar_EGMs,
    columns=[f'Point_{i}' for i in range(bipolar_EGMs.shape[1])]
)

# Add time column (assuming 500Hz sampling rate = 2ms per sample)
egm_df.insert(0, 'Time_ms', np.arange(bipolar_EGMs.shape[0]) * 2)

print(f"DataFrame shape: {egm_df.shape}")
print(f"Columns: Time_ms + {bipolar_EGMs.shape[1]} points")

egm_df

DataFrame shape: (1250, 10876)
Columns: Time_ms + 10875 points


Unnamed: 0,Time_ms,Point_0,Point_1,Point_2,Point_3,Point_4,Point_5,Point_6,Point_7,Point_8,...,Point_10865,Point_10866,Point_10867,Point_10868,Point_10869,Point_10870,Point_10871,Point_10872,Point_10873,Point_10874
0,0,0.000,-0.003,0.009,0.012,0.012,0.003,0.003,-0.009,-0.003,...,0.000,0.000,0.045,0.000,0.000,0.060,0.237,-0.069,0.015,-0.129
1,2,-0.003,-0.006,0.009,0.015,0.003,0.003,0.000,-0.012,-0.006,...,-0.003,0.000,0.048,0.000,0.003,0.033,-0.018,-0.036,0.099,-0.114
2,4,-0.009,0.000,0.003,0.012,0.000,0.000,0.006,-0.009,-0.006,...,0.000,0.000,0.039,0.000,0.000,-0.078,-0.534,0.003,0.204,-0.201
3,6,0.000,0.000,0.009,0.006,0.006,0.000,0.000,-0.006,-0.003,...,0.000,0.000,0.027,0.000,0.000,0.162,0.177,0.024,0.198,-0.198
4,8,0.000,0.000,0.003,0.000,0.003,0.003,0.000,-0.006,-0.003,...,0.000,0.000,0.024,0.000,0.000,0.096,0.249,0.030,0.138,-0.138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1245,2490,0.003,-0.003,0.000,-0.006,-0.003,0.000,-0.003,0.003,-0.006,...,-0.036,0.081,-0.021,-0.012,0.003,-0.015,-0.003,0.003,-0.003,0.000
1246,2492,0.000,-0.003,-0.003,0.000,-0.003,-0.006,-0.003,0.000,-0.003,...,0.039,0.039,-0.003,0.015,-0.312,-0.003,-0.003,0.006,0.000,0.003
1247,2494,0.000,0.000,0.000,0.000,0.000,0.000,-0.006,0.006,0.000,...,0.078,0.012,0.003,0.039,-1.206,0.000,0.000,0.000,0.000,0.000
1248,2496,0.000,0.000,-0.003,0.000,-0.003,0.000,0.000,0.003,0.000,...,0.111,-0.012,0.015,0.060,-0.387,0.003,0.000,0.000,0.000,0.003
