# Select Patients for Analysis

In [1]:
import numpy as np
import pandas as pd
import os

## 1. What Nina has

In [2]:
# Load HUP_implant_dates.xlsx
nina_patients_df = pd.read_excel("./data/HUP_implant_dates.xlsx")
nina_patients_df

Unnamed: 0,ptID,IEEG_Portal_Number,Implant_Date,implant_time,Explant_Date,weight_kg
0,225,HUP225_phaseII,2021-10-18,07:15:00,2021-10-26 17:30:00,58.5
1,224,HUP224_phaseII,2021-10-13,07:15:00,2021-10-20 00:00:00,85.5
2,223,HUP223_phaseII,2021-09-29,07:15:00,2021-10-08 08:21:00,101.4
3,221,HUP221_phaseII,2021-08-16,07:15:00,2021-08-23 00:00:00,124.3
4,219,HUP219_phaseII,2021-07-12,07:15:00,2021-07-16 08:18:00,101.6
...,...,...,...,...,...,...
75,141,HUP141_phaseII,2017-05-24,07:15:00,2017-06-01 00:00:00,85.7
76,140,HUP140_phaseII_D01-D02,2017-05-10,07:15:00,2017-05-19 00:00:00,56.7
77,139,HUP139_phaseII,2017-04-26,07:15:00,2017-05-09 00:00:00,69.8
78,138,HUP138_phaseII,2017-04-12,07:15:00,2017-04-20 00:00:00,84.4


In [3]:
hup_patient_ids = nina_patients_df["ptID"].to_numpy()
hup_patient_ids

array([225, 224, 223, 221, 219, 217, 216, 215, 214, 213, 211, 210, 209,
       208, 207, 206, 205, 204, 202, 201, 199, 197, 196, 195, 194, 193,
       192, 191, 190, 189, 188, 187, 186, 185, 184, 182, 181, 180, 179,
       178, 177, 175, 174, 173, 172, 171, 170, 169, 168, 167, 166, 165,
       164, 163, 162, 161, 160, 159, 158, 157, 156, 155, 154, 153, 152,
       151, 150, 149, 148, 147, 146, 145, 144, 143, 142, 141, 140, 139,
       138, 137])

## 2. Sleep/awake classification

In [4]:
ad_ratio_available_hup_ids = []
for filename in os.listdir("./data/ad_ratios"):
    if filename.endswith(".npy"):
        # Extract the string from the fourth character onwards
        patient_hup_id = int(filename[3:].split("_")[0])
        ad_ratio_available_hup_ids.append(patient_hup_id)

ad_ratio_available_hup_ids = np.array(ad_ratio_available_hup_ids).astype(int)
# Get the unique elements in the array
ad_ratio_available_hup_ids = np.unique(ad_ratio_available_hup_ids)
ad_ratio_available_hup_ids

array([ 65,  68,  71,  73,  74,  78,  80,  83,  88,  89,  93,  94, 100,
       101, 102, 105, 106, 107, 108, 110, 111, 112, 113, 114, 116, 117,
       118, 119, 120, 121, 123, 126, 127, 128, 129, 130, 131, 132, 133,
       134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 146, 150,
       151, 157, 158, 162, 163, 164, 171, 172, 177, 181, 185, 187, 188,
       190, 191])

## 3. Intersection

In [5]:
ad_ratio_available_hup_ids = hup_patient_ids

In [6]:
# Find the intersection of the two arrays
common_hup_ids = np.intersect1d(hup_patient_ids, ad_ratio_available_hup_ids)
common_hup_ids

array([137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
       150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162,
       163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
       177, 178, 179, 180, 181, 182, 184, 185, 186, 187, 188, 189, 190,
       191, 192, 193, 194, 195, 196, 197, 199, 201, 202, 204, 205, 206,
       207, 208, 209, 210, 211, 213, 214, 215, 216, 217, 219, 221, 223,
       224, 225])

## 4. Focal epilepsy - mesial temporal lobe

In [7]:
# Load the csv file from /data/soz_locations.csv
soz_locations_df = pd.read_csv("./data/soz_locations.csv").dropna()

# Drop the rows where name does not contain HUP
soz_locations_df = soz_locations_df[soz_locations_df["name"].str.contains("HUP")]

# Drop the rows where lateralization is bilateral
soz_locations_df = soz_locations_df[soz_locations_df["lateralization"] != "bilateral"]

# Drop the rows where region is not "mesial temporal"
soz_locations_df = soz_locations_df[soz_locations_df["region"] == "mesial temporal"]

# Delete the first three characters in the name column
soz_locations_df["name"] = soz_locations_df["name"].str[3:]

# Rename the name column to hup_id
soz_locations_df = soz_locations_df.rename(columns={"name": "hup_id"})

# Convert the hup_id column to int
soz_locations_df["hup_id"] = soz_locations_df["hup_id"].astype(int)

# Find the rows where hup_id is in common_hup_ids
soz_locations_df = soz_locations_df[soz_locations_df["hup_id"].isin(common_hup_ids)]

# Reset index
soz_locations_df = soz_locations_df.reset_index(drop=True)
soz_locations_df

Unnamed: 0,hup_id,region,lateralization
0,138,mesial temporal,left
1,140,mesial temporal,left
2,141,mesial temporal,right
3,142,mesial temporal,left
4,162,mesial temporal,left
5,163,mesial temporal,left
6,164,mesial temporal,left
7,173,mesial temporal,right
8,181,mesial temporal,left
9,185,mesial temporal,left


## 5. Get other information and write to file

In [8]:
# Find weight_kg from nina_patients_df and add it to soz_locations_df as a new column
soz_locations_df["weight_kg"] = soz_locations_df["hup_id"].apply(
    lambda x: nina_patients_df[nina_patients_df["ptID"] == x]["weight_kg"].values[0]
)
soz_locations_df

Unnamed: 0,hup_id,region,lateralization,weight_kg
0,138,mesial temporal,left,84.4
1,140,mesial temporal,left,56.7
2,141,mesial temporal,right,85.7
3,142,mesial temporal,left,65.3
4,162,mesial temporal,left,47.1
5,163,mesial temporal,left,82.1
6,164,mesial temporal,left,95.3
7,173,mesial temporal,right,76.6
8,181,mesial temporal,left,63.0
9,185,mesial temporal,left,76.2


In [10]:
# Load rid_hup_table.csv from ./data/
rid_hup_table_df = pd.read_csv("./data/rid_hup_table.csv")

# Drop the t3_subject_id and ieegportalsubjno columns
rid_hup_table_df = rid_hup_table_df.drop(columns=["t3_subject_id", "ieegportalsubjno"])

# Rename hupsubjno to hup_id
rid_hup_table_df = rid_hup_table_df.rename(columns={"hupsubjno": "hup_id"})
rid_hup_table_df

Unnamed: 0,record_id,hup_id
0,623,35
1,624,36
2,625,37
3,626,38
4,627,39
...,...,...
212,534,250
213,923,251
214,918,252
215,864,253


In [11]:
# Find record_id from rid_hup_table_df and add it to soz_locations_df as a new column
soz_locations_df["r_id"] = soz_locations_df["hup_id"].apply(
    lambda x: rid_hup_table_df[rid_hup_table_df["hup_id"] == x]["record_id"].values[0]
)
soz_locations_df

Unnamed: 0,hup_id,region,lateralization,weight_kg,r_id
0,138,mesial temporal,left,84.4,278
1,140,mesial temporal,left,56.7,320
2,141,mesial temporal,right,85.7,294
3,142,mesial temporal,left,65.3,295
4,162,mesial temporal,left,47.1,412
5,163,mesial temporal,left,82.1,279
6,164,mesial temporal,left,95.3,386
7,173,mesial temporal,right,76.6,31
8,181,mesial temporal,left,63.0,490
9,185,mesial temporal,left,76.2,332


In [12]:
# Save soz_locations_df as selected_patients.xlsx
soz_locations_df.to_excel("./data/selected_patients.xlsx", index=False)