# Select Patients for Analysis Temporal Neocortical

In [1]:
import numpy as np
import pandas as pd
import os

## 1. What Nina has

In [6]:
# Load HUP_implant_dates.xlsx
nina_patients_df = pd.read_excel("../../Data/HUP_implant_dates.xlsx")
nina_patients_df

Unnamed: 0,ptID,IEEG_Portal_Number,Implant_Date,implant_time,Explant_Date,weight_kg
0,225,HUP225_phaseII,2021-10-18,07:15:00,2021-10-26 17:30:00,58.5
1,224,HUP224_phaseII,2021-10-13,07:15:00,2021-10-20 00:00:00,85.5
2,223,HUP223_phaseII,2021-09-29,07:15:00,2021-10-08 08:21:00,101.4
3,221,HUP221_phaseII,2021-08-16,07:15:00,2021-08-23 00:00:00,124.3
4,219,HUP219_phaseII,2021-07-12,07:15:00,2021-07-16 08:18:00,101.6
...,...,...,...,...,...,...
75,141,HUP141_phaseII,2017-05-24,07:15:00,2017-06-01 00:00:00,85.7
76,140,HUP140_phaseII_D01-D02,2017-05-10,07:15:00,2017-05-19 00:00:00,56.7
77,139,HUP139_phaseII,2017-04-26,07:15:00,2017-05-09 00:00:00,69.8
78,138,HUP138_phaseII,2017-04-12,07:15:00,2017-04-20 00:00:00,84.4


In [7]:
hup_patient_ids = nina_patients_df["ptID"].to_numpy()
hup_patient_ids

array([225, 224, 223, 221, 219, 217, 216, 215, 214, 213, 211, 210, 209,
       208, 207, 206, 205, 204, 202, 201, 199, 197, 196, 195, 194, 193,
       192, 191, 190, 189, 188, 187, 186, 185, 184, 182, 181, 180, 179,
       178, 177, 175, 174, 173, 172, 171, 170, 169, 168, 167, 166, 165,
       164, 163, 162, 161, 160, 159, 158, 157, 156, 155, 154, 153, 152,
       151, 150, 149, 148, 147, 146, 145, 144, 143, 142, 141, 140, 139,
       138, 137])

## 4. Focal epilepsy - temporal neocortical

In [8]:
# Load the csv file from /data/soz_locations.csv
soz_locations_df = pd.read_csv("../../Data/soz_locations.csv").dropna()

# Drop the rows where name does not contain HUP
soz_locations_df = soz_locations_df[soz_locations_df["name"].str.contains("HUP")]

# Drop the rows where lateralization is bilateral
# soz_locations_df = soz_locations_df[soz_locations_df["lateralization"] != "bilateral"]

# # Drop the rows where region is not "mesial temporal"
# soz_locations_df = soz_locations_df[
#     soz_locations_df["region"] == "temporal neocortical"
# ]

# Delete the first three characters in the name column
soz_locations_df["name"] = soz_locations_df["name"].str[3:]

# Rename the name column to hup_id
soz_locations_df = soz_locations_df.rename(columns={"name": "hup_id"})

# Drop the columns where region is "temporal neocortical" or "mesial temporal"
soz_locations_df = soz_locations_df[
    (soz_locations_df["region"] != "temporal neocortical")
    & (soz_locations_df["region"] != "mesial temporal")
]


# Convert the hup_id column to int
soz_locations_df["hup_id"] = soz_locations_df["hup_id"].astype(int)

# Find the rows where hup_id is in common_hup_ids
soz_locations_df = soz_locations_df[soz_locations_df["hup_id"].isin(hup_patient_ids)]

# Reset index
soz_locations_df = soz_locations_df.reset_index(drop=True)
soz_locations_df

Unnamed: 0,hup_id,region,lateralization
0,137,multifocal,bilateral
1,139,other cortex,left
2,143,multifocal,bilateral
3,144,temporal multifocal,right
4,145,other cortex,right
5,146,other cortex,right
6,147,multifocal,left
7,148,temporal multifocal,bilateral
8,149,multifocal,bilateral
9,150,other cortex,right


## 5. Get other information and write to file

In [9]:
# Find weight_kg from nina_patients_df and add it to soz_locations_df as a new column
soz_locations_df["weight_kg"] = soz_locations_df["hup_id"].apply(
    lambda x: nina_patients_df[nina_patients_df["ptID"] == x]["weight_kg"].values[0]
)
soz_locations_df

Unnamed: 0,hup_id,region,lateralization,weight_kg
0,137,multifocal,bilateral,141.3
1,139,other cortex,left,69.8
2,143,multifocal,bilateral,61.5
3,144,temporal multifocal,right,62.8
4,145,other cortex,right,122.9
5,146,other cortex,right,102.1
6,147,multifocal,left,96.6
7,148,temporal multifocal,bilateral,81.6
8,149,multifocal,bilateral,81.8
9,150,other cortex,right,108.0


In [10]:
# Load rid_hup_table.csv from ./data/
rid_hup_table_df = pd.read_csv("../../Data/rid_hup_table.csv")

# Drop the t3_subject_id and ieegportalsubjno columns
rid_hup_table_df = rid_hup_table_df.drop(columns=["t3_subject_id", "ieegportalsubjno"])

# Rename hupsubjno to hup_id
rid_hup_table_df = rid_hup_table_df.rename(columns={"hupsubjno": "hup_id"})
rid_hup_table_df

Unnamed: 0,record_id,hup_id
0,623,35
1,624,36
2,625,37
3,626,38
4,627,39
...,...,...
212,534,250
213,923,251
214,918,252
215,864,253


In [11]:
# Find record_id from rid_hup_table_df and add it to soz_locations_df as a new column of type int, skip if not found
soz_locations_df["record_id"] = soz_locations_df["hup_id"].apply(
    lambda x: rid_hup_table_df[rid_hup_table_df["hup_id"] == x]["record_id"].values[0]
    if len(rid_hup_table_df[rid_hup_table_df["hup_id"] == x]["record_id"].values) > 0
    else np.nan
)
# Drop the rows where record_id is nan
soz_locations_df = soz_locations_df.dropna(subset=["record_id"])
# Convert record_id to int
soz_locations_df["record_id"] = soz_locations_df["record_id"].astype(int)
soz_locations_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soz_locations_df["record_id"] = soz_locations_df["record_id"].astype(int)


Unnamed: 0,hup_id,region,lateralization,weight_kg,record_id
0,137,multifocal,bilateral,141.3,280
2,143,multifocal,bilateral,61.5,206
3,144,temporal multifocal,right,62.8,112
4,145,other cortex,right,122.9,167
5,146,other cortex,right,102.1,301
6,147,multifocal,left,96.6,334
7,148,temporal multifocal,bilateral,81.6,267
8,149,multifocal,bilateral,81.8,240
9,150,other cortex,right,108.0,322
10,151,frontal,right,81.6,309


In [12]:
soz_locations_df["r_id"] = soz_locations_df["hup_id"].apply(
    lambda x: rid_hup_table_df[rid_hup_table_df["hup_id"] == x]["record_id"].values[0]
)
soz_locations_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soz_locations_df["r_id"] = soz_locations_df["hup_id"].apply(


Unnamed: 0,hup_id,region,lateralization,weight_kg,record_id,r_id
0,137,multifocal,bilateral,141.3,280,280
2,143,multifocal,bilateral,61.5,206,206
3,144,temporal multifocal,right,62.8,112,112
4,145,other cortex,right,122.9,167,167
5,146,other cortex,right,102.1,301,301
6,147,multifocal,left,96.6,334,334
7,148,temporal multifocal,bilateral,81.6,267,267
8,149,multifocal,bilateral,81.8,240,240
9,150,other cortex,right,108.0,322,322
10,151,frontal,right,81.6,309,309


In [14]:
# Save soz_locations_df as selected_patients.xlsx
soz_locations_df.to_excel("../../Data/selected_patients_rest.xlsx", index=False)