In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import seaborn as sns
import pickle
import math

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', 700)

This notebook samples the entire decay process for a certain number of donors. 

# Import master dataset and process it

In [2]:
# import master dataset
master_data = pd.read_pickle('../data/master_dataset.pkl')
print(len(master_data))
display(master_data.head())

1132970


Unnamed: 0,new_id,old_id,img_path,img,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M
0,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.01.JPG,00000121.01.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,stake,100.0,,,,,,
1,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.02.JPG,00000121.02.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,85.51,,,,,,
2,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.03.JPG,00000121.03.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,90.52,,,,,,
3,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.04.JPG,00000121.04.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,99.5,,,,,,
4,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.05.JPG,00000121.05.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,43.47,,,,,,


In [3]:
# extract donor_date from img column
master_data['donor_date'] = master_data.img.str.split('.', expand=True)[0]
display(master_data.head(3))

Unnamed: 0,new_id,old_id,img_path,img,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date
0,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.01.JPG,00000121.01.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,stake,100.0,,,,,,,121
1,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.02.JPG,00000121.02.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,85.51,,,,,,,121
2,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.03.JPG,00000121.03.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,90.52,,,,,,,121


In [4]:
# create final_BP column
master_data['final_BP'] = np.where(master_data['true_BP'].notnull(), 
                                            master_data['true_BP'],
                                            master_data['pred_BP'])
display(master_data.head(3))
len(master_data)

Unnamed: 0,new_id,old_id,img_path,img,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP
0,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.01.JPG,00000121.01.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,stake,100.0,,,,,,,121,stake
1,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.02.JPG,00000121.02.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,85.51,,,,,,,121,fullbody
2,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.03.JPG,00000121.03.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,90.52,,,,,,,121,fullbody


1132970

In [5]:
# remove images where true_BP.isnull() AND pred_BP_conf < 80
master_data_filtered = master_data[(master_data.true_BP.notnull()) | 
                                   ((master_data.true_BP.isnull()) & (master_data.pred_BP_conf >= 80.0))].copy()
display(master_data_filtered.head(3))
len(master_data_filtered)

Unnamed: 0,new_id,old_id,img_path,img,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP
0,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.01.JPG,00000121.01.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,stake,100.0,,,,,,,121,stake
1,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.02.JPG,00000121.02.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,85.51,,,,,,,121,fullbody
2,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000121.03.JPG,00000121.03.JPG,2018-01-21,2018-01-21,0.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,fullbody,90.52,,,,,,,121,fullbody


912242

In [6]:
# keep only where final_BP = head, torso, or  limbs imgs
BP_ls = ['head', 'torso', 'backside', 'arm', 'legs']
master_data_filtered = master_data_filtered[(master_data_filtered.final_BP.isin(BP_ls))].copy()
print(len(master_data_filtered))
print(master_data_filtered.final_BP.unique())
display(master_data_filtered.head())

538485
['torso' 'head' 'arm' 'legs' 'backside']


Unnamed: 0,new_id,old_id,img_path,img,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP
12,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000122.04.JPG,00000122.04.JPG,2018-01-22,2018-01-21,1.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,torso,94.74,,,,,,,122,torso
13,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000122.05.JPG,00000122.05.JPG,2018-01-22,2018-01-21,1.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,torso,99.77,,,,,,,122,torso
15,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000122.07.JPG,00000122.07.JPG,2018-01-22,2018-01-21,1.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,head,100.0,,,,,,,122,head
16,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000122.08.JPG,00000122.08.JPG,2018-01-22,2018-01-21,1.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,head,100.0,,,,,2.0,head,122,head
17,0,UT01-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/000/00000122.09.JPG,00000122.09.JPG,2018-01-22,2018-01-21,1.0,2018.0,Male,,177.8,160.0,70.000038,64.0,,arm,99.74,,,,,,,122,arm


In [7]:
# split into individual bodypart dataframes
head_df = master_data_filtered[master_data_filtered.final_BP == 'head'].copy()
torso_df = master_data_filtered[(master_data_filtered.final_BP == 'torso') | (master_data_filtered.final_BP == 'backside')].copy()
limbs_df = master_data_filtered[(master_data_filtered.final_BP == 'arm') | (master_data_filtered.final_BP == 'legs')].copy()
print(head_df.shape, torso_df.shape, limbs_df.shape)

(126582, 25) (148215, 25) (263688, 25)


In [8]:
# check bodypart dataframes don't contain duplicate images
print(head_df.img.nunique())
print(torso_df.img.nunique())
print(limbs_df.img.nunique())

126582
148215
263688


In [9]:
# align dataframes on donor_date column
head_torso_aligned = pd.merge(head_df , torso_df[['donor_date', 'img','true_SOD_G']], 
                            how='inner', on='donor_date', suffixes= ('_head', '_torso'))
print(head_torso_aligned.shape)
#display(head_torso_aligned.head())

head_torso_limbs_aligned = pd.merge(head_torso_aligned, limbs_df[['donor_date', 'img','true_SOD_G']], 
                                  how='inner', on='donor_date')
print(head_torso_limbs_aligned.shape)

(801203, 27)
(8637899, 29)


In [10]:
# rename some columns
head_torso_limbs_aligned.rename(columns={"img": "img_limbs", "true_SOD_G": "true_SOD_G_limbs"}, inplace=True)

In [11]:
head_torso_limbs_aligned[['img_head', 'img_torso', 'img_limbs']].head()

Unnamed: 0,img_head,img_torso,img_limbs
0,00000122.07.JPG,00000122.04.JPG,00000122.09.JPG
1,00000122.07.JPG,00000122.04.JPG,00000122.20.JPG
2,00000122.07.JPG,00000122.05.JPG,00000122.09.JPG
3,00000122.07.JPG,00000122.05.JPG,00000122.20.JPG
4,00000122.07.JPG,00000122.17.JPG,00000122.09.JPG


In [12]:
head_torso_limbs_aligned.describe()

Unnamed: 0,PMI_days,year,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,pred_BP_conf,true_SOD_G_head,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,true_SOD_G_torso,true_SOD_G_limbs
count,8637899.0,8606248.0,7744076.0,8194843.0,8369531.0,8605973.0,8637899.0,128988.0,486013.0,486013.0,194755.0,94893.0,45532.0
mean,39.86168,2014.217,170.1235,186.2491,66.75223,66.22147,98.63616,3.479959,3.625821,0.923065,2.557413,3.261674,3.461456
std,46.10867,1.608117,12.95149,62.65673,5.942957,15.55539,3.721343,1.133239,1.280777,0.132007,1.022777,1.111323,1.07617
min,0.0,2012.0,0.0,75.0,0.0,18.0,29.45,1.0,1.0,0.32456,1.0,1.0,1.0
25%,12.0,2013.0,162.56,144.0,64.00003,57.0,99.53,3.0,3.0,0.908885,2.0,3.0,3.0
50%,27.0,2014.0,170.18,175.0,67.00004,68.0,99.97,3.0,3.0,0.996367,2.0,3.0,3.0
75%,54.0,2015.0,177.8,212.0,70.00004,77.0,100.0,4.0,5.0,0.999955,4.0,4.0,4.0
max,723.0,2022.0,195.58,516.0,80.0,98.0,100.0,6.0,6.0,1.0,4.0,6.0,6.0


In [13]:
del master_data, master_data_filtered

# Sample data
Aim is to fill in time gaps of existing Gelderman SOD cohort in order to have some donor labels from fresh to skeletonized.

In [14]:
# filter such that 0<=PMI_days<=365
img_trips = head_torso_limbs_aligned[head_torso_limbs_aligned.PMI_days <= 365.0].copy()
len(img_trips)

8595272

In [15]:
img_trips.dropna(subset=['sex', 'est_weight_lb', 'est_stature_in', 'age_at_death'], inplace=True)
img_trips.shape

(8121248, 29)

In [16]:
# import Gelderman SOD cohort
g_data = pd.read_csv('../data/Gelderman_SOD_cohort/unique_img_triplets_plus.csv')
print(g_data.shape)
display(g_data.head())

(256, 15)


Unnamed: 0,new_id,donor_date,correct_img_date,date_placed_ARF,PMI_days,age_at_death,sex,est_weight_lb,est_stature_in,img_head,true_SOD_G_head,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs
0,004,00400124,2018-01-24,2018-01-09,15.0,77.0,Female,180.0,64.0,00400124.12.JPG,3.0,00400124.07.JPG,3.0,00400124.10.JPG,3.0
2,00b,00b00525,2016-05-25,2016-03-07,79.0,38.0,Male,516.0,73.000039,00b00525.08.JPG,5.0,00b00525.04.JPG,5.0,00b00525.27.JPG,3.0
3,00b,00b00818,2016-08-18,2016-03-07,164.0,38.0,Male,516.0,73.000039,00b00818.07.JPG,6.0,00b00818.05.JPG,6.0,00b00818.11.JPG,3.0
4,00d,00d10116,2019-01-16,2018-11-27,50.0,73.0,Male,235.0,73.000039,00d10116.07.JPG,5.0,00d10116.04.JPG,4.0,00d10116.06.JPG,4.0
5,011,01101210,2018-12-10,2018-11-29,11.0,90.0,Female,170.0,69.000037,01101210.06.JPG,2.0,01101210.03.JPG,2.0,01101210.08.JPG,2.0


In [17]:
# extract only donors from g_data
img_trips_filtered = pd.merge(img_trips , g_data[['new_id']].drop_duplicates(), 
                            how='inner', on='new_id')
img_trips_filtered.shape

(3909488, 29)

In [18]:
# remove donor_date already in g_data (meaning already labeled)
img_trips_filtered2 = pd.merge(img_trips_filtered , g_data[['donor_date']].drop_duplicates(), 
                            how='left', on='donor_date', indicator=True)
print(img_trips_filtered2.shape)
display(img_trips_filtered2.head())

img_trips_filtered2 = img_trips_filtered2[img_trips_filtered2._merge == 'left_only'].copy()
img_trips_filtered2.drop(['_merge'], axis=1, inplace=True)

print(img_trips_filtered2.shape)
display(img_trips_filtered2.head())

(3909488, 30)


Unnamed: 0,new_id,old_id,img_path,img_head,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G_head,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs,_merge
0,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.09.JPG,00400110.09.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,98.51,,,,,1.0,head,400110,head,00400110.13.JPG,,00400110.11.JPG,,left_only
1,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.09.JPG,00400110.09.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,98.51,,,,,1.0,head,400110,head,00400110.13.JPG,,00400110.15.JPG,,left_only
2,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.09.JPG,00400110.09.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,98.51,,,,,1.0,head,400110,head,00400110.13.JPG,,00400110.22.JPG,,left_only
3,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.09.JPG,00400110.09.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,98.51,,,,,1.0,head,400110,head,00400110.13.JPG,,00400110.26.JPG,,left_only
4,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.10.JPG,00400110.10.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,99.41,,,,,,,400110,head,00400110.13.JPG,,00400110.11.JPG,,left_only


(3827793, 29)


Unnamed: 0,new_id,old_id,img_path,img_head,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G_head,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs
0,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.09.JPG,00400110.09.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,98.51,,,,,1.0,head,400110,head,00400110.13.JPG,,00400110.11.JPG,
1,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.09.JPG,00400110.09.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,98.51,,,,,1.0,head,400110,head,00400110.13.JPG,,00400110.15.JPG,
2,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.09.JPG,00400110.09.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,98.51,,,,,1.0,head,400110,head,00400110.13.JPG,,00400110.22.JPG,
3,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.09.JPG,00400110.09.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,98.51,,,,,1.0,head,400110,head,00400110.13.JPG,,00400110.26.JPG,
4,4,UT06-18D,/da1_data/icputrd/arf/mean.js/public/anau_img3/004/00400110.10.JPG,00400110.10.JPG,2018-01-10,2018-01-09,1.0,2018.0,Female,white,,180.0,64.0,77.0,,head,99.41,,,,,,,400110,head,00400110.13.JPG,,00400110.11.JPG,


### Donor: 5fc
Female, 56 years, placed in January, 264 days

In [19]:
donor_5fc = img_trips_filtered2[img_trips_filtered2.new_id == '5fc'].copy()
print(donor_5fc.shape)
display(donor_5fc.head(1))
display(donor_5fc.describe())

(54356, 29)


Unnamed: 0,new_id,old_id,img_path,img_head,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G_head,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs
1246433,5fc,UT102-14D,/da1_data/icputrd/arf/mean.js/public/anau_img3/5fc/5fc10106.11.JPG,5fc10106.11.JPG,2015-01-06,2015-01-06,0.0,2014.0,Female,,165.1,102.0,65.000035,56.0,,head,83.7,,,,,1.0,head,5fc10106,head,5fc10106.05.JPG,,5fc10106.07.JPG,


Unnamed: 0,PMI_days,year,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,pred_BP_conf,true_SOD_G_head,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,true_SOD_G_torso,true_SOD_G_limbs
count,54356.0,54356.0,54356.0,54356.0,54356.0,54356.0,54356.0,32.0,2212.0,2212.0,1277.0,64.0,0.0
mean,84.301383,2014.0,165.1,102.0,65.000035,56.0,96.672,2.0,3.15009,0.940803,1.779953,3.0,
std,37.190231,0.0,2.842197e-14,0.0,0.0,0.0,5.529848,0.0,0.912789,0.066711,0.524604,0.0,
min,0.0,2014.0,165.1,102.0,65.000035,56.0,42.13,2.0,2.0,0.816782,1.0,3.0,
25%,67.0,2014.0,165.1,102.0,65.000035,56.0,96.09,2.0,2.0,0.880035,1.0,3.0,
50%,89.0,2014.0,165.1,102.0,65.000035,56.0,99.3,2.0,4.0,0.961953,2.0,3.0,
75%,103.0,2014.0,165.1,102.0,65.000035,56.0,99.87,2.0,4.0,0.997878,2.0,3.0,
max,264.0,2014.0,165.1,102.0,65.000035,56.0,100.0,2.0,4.0,0.999833,3.0,3.0,


In [20]:
donor_5fc_samples = donor_5fc.groupby('PMI_days').sample(1, random_state=1).copy()
donor_5fc_samples = donor_5fc_samples[donor_5fc_samples.PMI_days % 2==0].copy()
donor_5fc_samples.shape

(70, 29)

### Donor: 47d
Male, 63 years, placed in July, 81 days

In [37]:
donor_47d = img_trips_filtered2[img_trips_filtered2.new_id == '47d'].copy()
print(donor_47d.shape)
display(donor_47d.head(1))
display(donor_47d.describe())

(17161, 29)


Unnamed: 0,new_id,old_id,img_path,img_head,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G_head,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs
1017986,47d,UT06-12D,/da1_data/icputrd/arf/mean.js/public/anau_img3/47d/47d00716.26.JPG,47d00716.26.JPG,2012-07-16,2012-07-16,0.0,2012.0,Male,,180.34,220.0,71.000038,63.0,,head,99.99,,,,,,,47d00716,head,47d00716.04.JPG,,47d00716.08.JPG,


Unnamed: 0,PMI_days,year,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,pred_BP_conf,true_SOD_G_head,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,true_SOD_G_torso,true_SOD_G_limbs
count,17161.0,17161.0,17161.0,17161.0,17161.0,17161.0,17161.0,0.0,80.0,80.0,267.0,0.0,0.0
mean,11.471826,2012.0,180.34,220.0,71.00004,63.0,98.52753,,3.0,1.0,3.629213,,
std,11.986007,0.0,0.0,0.0,2.842254e-14,0.0,3.749998,,0.0,0.0,0.483922,,
min,0.0,2012.0,180.34,220.0,71.00004,63.0,80.59,,3.0,1.0,3.0,,
25%,3.0,2012.0,180.34,220.0,71.00004,63.0,99.41,,3.0,1.0,3.0,,
50%,6.0,2012.0,180.34,220.0,71.00004,63.0,99.97,,3.0,1.0,4.0,,
75%,16.0,2012.0,180.34,220.0,71.00004,63.0,100.0,,3.0,1.0,4.0,,
max,81.0,2012.0,180.34,220.0,71.00004,63.0,100.0,,3.0,1.0,4.0,,


In [38]:
donor_47d_samples = donor_47d.groupby('PMI_days').sample(1, random_state=1).copy()
donor_47d_samples = donor_47d_samples[donor_47d_samples.PMI_days % 2==0].copy()
donor_47d_samples.shape

(20, 29)

### Donor: 0f2
Male, 37 years, placed in December, 189 days

In [39]:
donor_0f2 = img_trips_filtered2[img_trips_filtered2.new_id == '0f2'].copy()
print(donor_0f2.shape)
display(donor_0f2.head(1))
display(donor_0f2.describe())

(114524, 29)


Unnamed: 0,new_id,old_id,img_path,img_head,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G_head,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs
352774,0f2,UT79-13D,/da1_data/icputrd/arf/mean.js/public/anau_img3/0f2/0f201219.35.JPG,0f201219.35.JPG,2013-12-19,2013-12-19,0.0,2013.0,Male,,177.8,200.0,70.000038,37.0,,head,100.0,,,,,,,0f201219,head,0f201219.09.JPG,,0f201219.13.JPG,


Unnamed: 0,PMI_days,year,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,pred_BP_conf,true_SOD_G_head,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,true_SOD_G_torso,true_SOD_G_limbs
count,114524.0,114524.0,114524.0,114524.0,114524.0,114524.0,114524.0,4120.0,24270.0,24270.0,751.0,3184.0,0.0
mean,68.419275,2013.0,177.8,200.0,70.00004,37.0,98.771134,3.528155,3.137412,0.958697,2.59787,3.336683,
std,43.223044,0.0,2.842183e-14,0.0,2.842183e-14,0.0,3.851984,0.499267,0.406321,0.100646,1.005688,0.941766,
min,0.0,2013.0,177.8,200.0,70.00004,37.0,50.16,3.0,2.0,0.505819,1.0,2.0,
25%,30.0,2013.0,177.8,200.0,70.00004,37.0,99.59,3.0,3.0,0.990356,2.0,2.0,
50%,69.0,2013.0,177.8,200.0,70.00004,37.0,99.96,4.0,3.0,0.999923,3.0,4.0,
75%,99.0,2013.0,177.8,200.0,70.00004,37.0,100.0,4.0,3.0,0.999997,3.0,4.0,
max,189.0,2013.0,177.8,200.0,70.00004,37.0,100.0,4.0,5.0,1.0,4.0,4.0,


In [40]:
donor_0f2_samples = donor_0f2.groupby('PMI_days').sample(1, random_state=1).copy()
donor_0f2_samples = donor_0f2_samples[donor_0f2_samples.PMI_days % 2==0].copy()
donor_0f2_samples.shape

(76, 29)

### Donor: 3e1
Female, 52 years, placed in October, 61 days

In [41]:
donor_3e1 = img_trips_filtered2[img_trips_filtered2.new_id == '3e1'].copy()
print(donor_3e1.shape)
display(donor_3e1.head(1))
display(donor_3e1.describe())

(17105, 29)


Unnamed: 0,new_id,old_id,img_path,img_head,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G_head,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs
893562,30.0,UT72-12D,/da1_data/icputrd/arf/mean.js/public/anau_img3/3e1/3e101003.21.JPG,3e101003.21.JPG,2012-10-03,2012-10-03,0.0,2012.0,Female,,182.88,145.0,72.000039,52.0,,head,98.95,,,,,,,3e101003,head,3e101003.04.JPG,,3e101003.08.JPG,


Unnamed: 0,PMI_days,year,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,pred_BP_conf,true_SOD_G_head,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,true_SOD_G_torso,true_SOD_G_limbs
count,17105.0,17105.0,17105.0,17105.0,17105.0,17105.0,17105.0,0.0,2508.0,2508.0,536.0,0.0,0.0
mean,17.342005,2012.0,182.88,145.0,72.00004,52.0,99.213811,,3.198565,0.904189,2.447761,,
std,14.175111,0.0,5.684508e-14,0.0,1.421127e-14,0.0,2.593105,,0.668129,0.148282,0.820915,,
min,0.0,2012.0,182.88,145.0,72.00004,52.0,80.55,,3.0,0.40628,2.0,,
25%,9.0,2012.0,182.88,145.0,72.00004,52.0,99.83,,3.0,0.870332,2.0,,
50%,14.0,2012.0,182.88,145.0,72.00004,52.0,99.98,,3.0,0.989212,2.0,,
75%,21.0,2012.0,182.88,145.0,72.00004,52.0,100.0,,3.0,0.99969,2.0,,
max,61.0,2012.0,182.88,145.0,72.00004,52.0,100.0,,6.0,0.999997,4.0,,


In [42]:
donor_3e1_samples = donor_3e1.groupby('PMI_days').sample(1, random_state=1).copy()
donor_3e1_samples = donor_3e1_samples[donor_3e1_samples.PMI_days % 2==0].copy()
donor_3e1_samples.shape

(21, 29)

### Donor: 2a3
Male, 47 years, placed in March, 156 days

In [43]:
donor_2a3 = img_trips_filtered2[img_trips_filtered2.new_id == '2a3'].copy()
print(donor_2a3.shape)
display(donor_2a3.head(1))
display(donor_2a3.describe())

(20388, 29)


Unnamed: 0,new_id,old_id,img_path,img_head,correct_img_date,date_placed_ARF,PMI_days,year,sex,ancestry,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,true_BP,pred_BP,pred_BP_conf,true_SOD_G_head,BP_of_true_SOD_G,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,BP_of_true_SOD_M,donor_date,final_BP,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs
838046,2a3,UT09-13D,/da1_data/icputrd/arf/mean.js/public/anau_img3/2a3/2a300309.14.JPG,2a300309.14.JPG,2013-03-09,2013-03-09,0.0,2013.0,Male,,190.5,235.0,75.000041,47.0,,head,100.0,,,,,,,2a300309,head,2a300309.04.JPG,,2a300309.08.JPG,


Unnamed: 0,PMI_days,year,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,pred_BP_conf,true_SOD_G_head,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,true_SOD_G_torso,true_SOD_G_limbs
count,20388.0,20388.0,20388.0,20388.0,20388.0,20388.0,20388.0,0.0,1726.0,1726.0,312.0,0.0,0.0
mean,43.851383,2013.0,190.5,235.0,75.00004,47.0,99.081024,,4.590962,0.862516,2.692308,,
std,40.289666,0.0,0.0,0.0,2.842241e-14,0.0,2.990823,,0.491799,0.143682,0.46228,,
min,0.0,2013.0,190.5,235.0,75.00004,47.0,80.37,,4.0,0.590265,2.0,,
25%,12.0,2013.0,190.5,235.0,75.00004,47.0,99.91,,4.0,0.755225,2.0,,
50%,29.0,2013.0,190.5,235.0,75.00004,47.0,100.0,,5.0,0.947133,3.0,,
75%,68.0,2013.0,190.5,235.0,75.00004,47.0,100.0,,5.0,0.98859,3.0,,
max,156.0,2013.0,190.5,235.0,75.00004,47.0,100.0,,5.0,0.999823,3.0,,


In [44]:
donor_2a3_samples = donor_2a3.groupby('PMI_days').sample(1, random_state=1).copy()
donor_2a3_samples = donor_2a3_samples[donor_2a3_samples.PMI_days % 2==0].copy()
donor_2a3_samples.shape

(40, 29)

## Process new samples

In [45]:
df_ls = [donor_0f2_samples, donor_2a3_samples, donor_3e1_samples, donor_47d_samples, donor_5fc_samples]

In [46]:
# concat new sample into a single df
new_samples = pd.concat(df_ls, axis=0)
new_samples.shape

(227, 29)

In [47]:
# drop duplicates rows 
new_samples.drop_duplicates(inplace=True)
len(new_samples)

227

In [48]:
# save as CSV file
#new_samples.to_csv('../data/Gelderman_SOD_cohort/new_samples.csv', index_label=False)

In [49]:
new_samples.describe()

Unnamed: 0,PMI_days,year,est_stature_cm,est_weight_lb,est_stature_in,age_at_death,pred_BP_conf,true_SOD_G_head,pred_SOD_G,pred_SOD_G_conf,true_SOD_M,true_SOD_G_torso,true_SOD_G_limbs
count,227.0,227.0,227.0,227.0,227.0,227.0,227.0,2.0,29.0,29.0,4.0,2.0,0.0
mean,68.819383,2013.127753,176.81533,172.621145,69.612372,48.299559,98.392467,3.0,3.448276,0.943479,2.0,3.5,
std,52.755388,0.689026,8.972123,52.488744,3.532334,9.055295,3.542479,1.414214,0.631676,0.11605,1.154701,0.707107,
min,0.0,2012.0,165.1,102.0,65.000035,37.0,79.94,2.0,3.0,0.572499,1.0,3.0,
25%,24.0,2013.0,165.1,102.0,65.000035,37.0,98.745,2.5,3.0,0.951932,1.0,3.25,
50%,60.0,2013.0,177.8,200.0,70.000038,47.0,99.91,3.0,3.0,0.997762,2.0,3.5,
75%,105.0,2014.0,182.88,220.0,72.000039,56.0,99.99,3.5,4.0,0.999959,3.0,3.75,
max,264.0,2014.0,190.5,235.0,75.000041,63.0,100.0,4.0,5.0,1.0,3.0,4.0,


#### Prepare file for IPCUTRD labeling

In [50]:
new_head = new_samples[['img_head']].copy()
new_head['BP'] = 'head'
new_head.columns = ['img', 'BP']
display(new_head.head(2))

new_torso = new_samples[['img_torso']].copy()
new_torso['BP'] = 'torso'
new_torso.columns = ['img', 'BP']
display(new_torso.head(2))

new_limbs = new_samples[['img_limbs']].copy()
new_limbs['BP'] = 'limbs'
new_limbs.columns = ['img', 'BP']
display(new_limbs.head(2))

new_samples2 = pd.concat([new_head, new_torso, new_limbs], axis=0)
print(len(new_samples2))

all_new_no_dups = new_samples2.drop_duplicates('img').copy()
print(len(all_new_no_dups))
display(all_new_no_dups.head())

Unnamed: 0,img,BP
353207,0f201219.39.JPG,head
354628,0f201221.37.JPG,head


Unnamed: 0,img,BP
353207,0f201219.12.JPG,torso
354628,0f201221.27.JPG,torso


Unnamed: 0,img,BP
353207,0f201219.14.JPG,limbs
354628,0f201221.22.JPG,limbs


681
681


Unnamed: 0,img,BP
353207,0f201219.39.JPG,head
354628,0f201221.37.JPG,head
357156,0f201223.42.JPG,head
358599,0f201225.36.JPG,head
360719,0f201227.40.JPG,head


In [51]:
# create img_path column
all_new_no_dups['img_path'] = '/da1_data/icputrd/arf/mean.js/public/anau_img3/' \
                                + all_new_no_dups['img'].str[:3] + '/' + all_new_no_dups['img'].astype(str)
display(all_new_no_dups.head())
len(all_new_no_dups)

Unnamed: 0,img,BP,img_path
353207,0f201219.39.JPG,head,/da1_data/icputrd/arf/mean.js/public/anau_img3/0f2/0f201219.39.JPG
354628,0f201221.37.JPG,head,/da1_data/icputrd/arf/mean.js/public/anau_img3/0f2/0f201221.37.JPG
357156,0f201223.42.JPG,head,/da1_data/icputrd/arf/mean.js/public/anau_img3/0f2/0f201223.42.JPG
358599,0f201225.36.JPG,head,/da1_data/icputrd/arf/mean.js/public/anau_img3/0f2/0f201225.36.JPG
360719,0f201227.40.JPG,head,/da1_data/icputrd/arf/mean.js/public/anau_img3/0f2/0f201227.40.JPG


681

In [52]:
# save to CSV file which will be used to populate ICPUTRd for labeling
all_new_no_dups[['img_path', 'BP']].to_csv('/home/anau/SOD_labeling/for_PMI_estimation/additional_samples2.csv',
                                          header=False, index=False)