In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import math
import datetime

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
#pd.set_option('max_colwidth', 700)

This notebook performs image feature extraction using the Gelderman SOD classification models by extracting the image embedding before the final classification layer (dense layer of size 64). This results in three image embeddings (head, torso, and limbs) which will then be averaged, resulting in a single image embedding.

# Import data

In [2]:
data = pd.read_csv('../data/Gelderman_SOD_cohort/all_img_triplets.csv')
print(len(data))
display(data.head())

42080


Unnamed: 0,new_id,img_head,correct_img_date,date_placed_ARF,PMI_days,age_at_death,sex,cadaver_weight_lbs,est_stature_in,true_SOD_G_head,BP_of_true_SOD_G,donor_date,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs
0,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.10.JPG,3.0
1,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.11.JPG,3.0
2,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.18.JPG,4.0
3,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.19.JPG,4.0
4,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.36.JPG,3.0


# Image feature extraction
Only needs to be run once.

In [62]:
# prepare files for CNN feature extraction (CNN_feature_extraction.py)
# each row is an image file name (no duplicates)
#data[['img_head']].drop_duplicates(keep='first').to_csv('../data/Gelderman_SOD_cohort/head_imgs.csv', 
                                                        header=False, index=False)
#data[['img_torso']].drop_duplicates(keep='first').to_csv('../data/Gelderman_SOD_cohort/torso_imgs.csv', 
                                                         header=False, index=False)
#data[['img_limbs']].drop_duplicates(keep='first').to_csv('../data/Gelderman_SOD_cohort/limbs_imgs.csv', 
                                                         header=False, index=False)

Perform feature extraction for the above three files by running "python3 CNN_feature_extraction.py". Make sure to modify paths inside script prior to running.

### Process img embeddings and add to the dataframe

In [3]:
with open('../data/Gelderman_SOD_cohort/embeddings/head_img_emb_dict', 'rb') as f:
    head_img_embs_dict = pickle.load(f)
print(len(head_img_embs_dict))

with open('../data/Gelderman_SOD_cohort/embeddings/torso_img_emb_dict', 'rb') as f:
    torso_img_embs_dict = pickle.load(f)
print(len(torso_img_embs_dict))

with open('../data/Gelderman_SOD_cohort/embeddings/limbs_img_emb_dict', 'rb') as f:
    limbs_img_embs_dict = pickle.load(f)
print(len(limbs_img_embs_dict))

745
743
1837


In [5]:
# for each instance/row in data, take average of individual image embeddings
idx_avg_emb = {}
for idx in range(len(data)):
    # get image for each bodypart
    head_img = data['img_head'].iloc[idx]
    torso_img = data['img_torso'].iloc[idx]
    limbs_img = data['img_limbs'].iloc[idx]
    
    # get img embedding for each bodypart
    head_img_emb_arr = head_img_embs_dict[head_img][0]
    torso_img_emb_arr = torso_img_embs_dict[torso_img][0]
    limbs_img_emb_arr = limbs_img_embs_dict[limbs_img][0]
    #print(head_img_emb_arr)
    #print(torso_img_emb_arr)
    #print(limbs_img_emb_arr)
    
    # take avg of embeddings
    emb_avg_arr = (head_img_emb_arr + torso_img_emb_arr + limbs_img_emb_arr) / 3
    emb_avg_ls = emb_avg_arr.tolist()  # convert to list
    #print(emb_avg_arr)
    
    # append to dict
    idx_avg_emb[data.index[idx]] = emb_avg_ls

In [6]:
avg_img_embeddings = pd.DataFrame.from_dict(idx_avg_emb, orient='index')
avg_img_embeddings.reset_index(inplace=True)
display(avg_img_embeddings.head())
print(avg_img_embeddings.shape)

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63
0,0,0.724824,1.149898,0.689984,1.239605,2.82372,0.768867,4.392087,2.28997,0.854456,1.994645,0.878973,1.475379,0.0,1.034006,0.745903,1.022248,2.885059,1.058317,1.267659,2.312767,0.0,0.0,1.503174,1.252649,1.465867,0.029327,2.485211,1.145958,0.0,0.623376,0.75348,1.694158,1.114506,1.730707,0.0,0.560717,0.990937,3.373392,1.368024,0.0,1.433687,0.46836,0.0,1.076626,0.0,0.400011,3.335856,0.639417,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,3.055927,0.356006,0.0,0.212531,1.343125,2.152382,1.062701,1.635706,1.340923,0.0
1,1,0.716963,1.149898,0.955302,1.319903,2.705828,0.768867,4.392087,2.307821,0.854456,1.504295,0.878973,1.201398,0.0,0.896066,0.745903,0.907396,3.0215,0.9616,1.267659,2.386908,0.0,0.0,1.317928,1.52569,1.118749,0.06152,2.485211,0.965051,0.0,0.68489,0.75348,1.694158,0.89288,1.786678,0.0,0.560717,0.890066,3.153,1.368024,0.0,1.433687,0.393529,0.0,0.922296,0.0,0.400011,3.096745,0.562556,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,3.200524,0.356006,0.0,0.178267,1.343125,2.153842,0.962503,1.635706,1.340923,0.0
2,2,0.0,1.149898,0.0,1.239605,3.062716,0.768867,4.392087,1.867076,0.854456,2.965985,0.878973,1.2656,0.0,2.985819,0.745903,1.480414,2.043196,0.959247,1.267659,2.312767,0.0,0.259414,2.555381,1.050051,2.345941,0.029327,4.311337,2.085147,0.206266,0.623376,1.085992,1.694158,2.53262,1.612098,0.0,0.560717,0.210517,3.469567,1.368024,0.0,1.433687,0.366301,0.0,3.380263,0.0,0.697988,3.412217,2.328625,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,2.886595,0.356006,0.0,0.0,1.343125,2.997689,1.215456,1.635706,1.340923,0.0
3,3,0.0,1.149898,0.0,1.239605,3.025267,0.768867,4.392087,1.867076,0.854456,2.883476,0.878973,1.350307,0.0,2.9122,0.745903,1.394427,2.043196,0.959247,1.267659,2.312767,0.0,0.410184,2.610728,1.050051,2.318987,0.029327,4.337298,1.892848,0.153347,0.623376,1.375643,1.694158,2.450993,1.612098,0.0,0.560717,0.210517,3.338618,1.368024,0.0,1.433687,0.409887,0.0,3.286015,0.0,0.548375,3.40303,2.268347,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,2.886595,0.356006,0.0,0.0,1.343125,2.97374,1.137876,1.635706,1.340923,0.0
4,4,0.867568,1.149898,0.503202,1.239605,2.375831,0.8756,4.392087,2.232578,0.854456,1.602763,0.878973,1.289241,0.0,0.851745,0.745903,0.785885,2.401414,0.959247,1.267659,2.312767,0.253486,0.0,1.102267,1.580117,1.318771,0.075269,2.485211,0.914396,0.0,0.623376,0.816278,1.694158,0.308897,2.233836,0.0,0.560717,0.655829,3.430094,1.368024,0.0,1.433687,0.757137,0.0,0.859736,0.0,0.400011,2.903947,0.549988,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,3.249255,0.356006,0.0,0.125022,1.343125,2.538878,1.060257,1.635706,1.340923,0.0


(42080, 65)


In [8]:
data_w_embs = pd.merge(data, avg_img_embeddings, how='left', left_index=True, right_on='index')
display(data_w_embs.head(10))
print(data_w_embs.shape)

Unnamed: 0,new_id,img_head,correct_img_date,date_placed_ARF,PMI_days,age_at_death,sex,cadaver_weight_lbs,est_stature_in,true_SOD_G_head,BP_of_true_SOD_G,donor_date,img_torso,true_SOD_G_torso,img_limbs,true_SOD_G_limbs,index,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63
0,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.10.JPG,3.0,0,0.724824,1.149898,0.689984,1.239605,2.82372,0.768867,4.392087,2.28997,0.854456,1.994645,0.878973,1.475379,0.0,1.034006,0.745903,1.022248,2.885059,1.058317,1.267659,2.312767,0.0,0.0,1.503174,1.252649,1.465867,0.029327,2.485211,1.145958,0.0,0.623376,0.75348,1.694158,1.114506,1.730707,0.0,0.560717,0.990937,3.373392,1.368024,0.0,1.433687,0.46836,0.0,1.076626,0.0,0.400011,3.335856,0.639417,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,3.055927,0.356006,0.0,0.212531,1.343125,2.152382,1.062701,1.635706,1.340923,0.0
1,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.11.JPG,3.0,1,0.716963,1.149898,0.955302,1.319903,2.705828,0.768867,4.392087,2.307821,0.854456,1.504295,0.878973,1.201398,0.0,0.896066,0.745903,0.907396,3.0215,0.9616,1.267659,2.386908,0.0,0.0,1.317928,1.52569,1.118749,0.06152,2.485211,0.965051,0.0,0.68489,0.75348,1.694158,0.89288,1.786678,0.0,0.560717,0.890066,3.153,1.368024,0.0,1.433687,0.393529,0.0,0.922296,0.0,0.400011,3.096745,0.562556,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,3.200524,0.356006,0.0,0.178267,1.343125,2.153842,0.962503,1.635706,1.340923,0.0
2,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.18.JPG,4.0,2,0.0,1.149898,0.0,1.239605,3.062716,0.768867,4.392087,1.867076,0.854456,2.965985,0.878973,1.2656,0.0,2.985819,0.745903,1.480414,2.043196,0.959247,1.267659,2.312767,0.0,0.259414,2.555381,1.050051,2.345941,0.029327,4.311337,2.085147,0.206266,0.623376,1.085992,1.694158,2.53262,1.612098,0.0,0.560717,0.210517,3.469567,1.368024,0.0,1.433687,0.366301,0.0,3.380263,0.0,0.697988,3.412217,2.328625,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,2.886595,0.356006,0.0,0.0,1.343125,2.997689,1.215456,1.635706,1.340923,0.0
3,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.19.JPG,4.0,3,0.0,1.149898,0.0,1.239605,3.025267,0.768867,4.392087,1.867076,0.854456,2.883476,0.878973,1.350307,0.0,2.9122,0.745903,1.394427,2.043196,0.959247,1.267659,2.312767,0.0,0.410184,2.610728,1.050051,2.318987,0.029327,4.337298,1.892848,0.153347,0.623376,1.375643,1.694158,2.450993,1.612098,0.0,0.560717,0.210517,3.338618,1.368024,0.0,1.433687,0.409887,0.0,3.286015,0.0,0.548375,3.40303,2.268347,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,2.886595,0.356006,0.0,0.0,1.343125,2.97374,1.137876,1.635706,1.340923,0.0
4,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.36.JPG,3.0,4,0.867568,1.149898,0.503202,1.239605,2.375831,0.8756,4.392087,2.232578,0.854456,1.602763,0.878973,1.289241,0.0,0.851745,0.745903,0.785885,2.401414,0.959247,1.267659,2.312767,0.253486,0.0,1.102267,1.580117,1.318771,0.075269,2.485211,0.914396,0.0,0.623376,0.816278,1.694158,0.308897,2.233836,0.0,0.560717,0.655829,3.430094,1.368024,0.0,1.433687,0.757137,0.0,0.859736,0.0,0.400011,2.903947,0.549988,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,3.249255,0.356006,0.0,0.125022,1.343125,2.538878,1.060257,1.635706,1.340923,0.0
5,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.37.JPG,3.0,5,1.301861,1.149898,0.789077,1.341804,2.103987,0.966558,4.392087,2.060813,0.854456,1.169984,0.878973,1.391913,0.121516,0.668364,0.745903,0.683472,2.478359,0.959247,1.267659,2.567085,0.412549,0.0,0.770173,1.668692,1.145428,0.084165,2.485211,0.742358,0.0,0.623376,0.75348,1.694158,0.41539,2.376742,0.0,0.560717,1.226798,3.254744,1.752915,0.0,1.433687,0.558798,0.0,0.143504,0.0,0.400011,2.682185,0.310991,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,3.415269,0.356006,0.0,0.393228,1.343125,2.284755,0.719033,1.635706,1.340923,0.0
6,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.42.JPG,4.0,6,0.0,1.149898,0.0,1.239605,2.463116,0.768867,4.392087,1.867076,0.854456,2.218313,0.878973,1.10311,0.0,2.220397,0.745903,1.182635,2.043196,0.959247,1.267659,2.312767,0.0,0.186183,1.823472,1.074989,2.224841,0.029327,3.710032,1.615941,0.124756,0.623376,0.917792,1.694158,2.035469,1.612098,0.0,0.657694,0.264304,3.035237,1.368024,0.0,1.433687,0.128192,0.0,2.023686,0.0,0.400011,3.006443,1.745337,0.788177,0.0,0.354045,3.52474,2.901803,1.905486,2.886595,0.356006,0.0,0.0,1.343125,2.330603,0.250448,1.811323,1.340923,0.0
7,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.43.JPG,4.0,7,0.0,1.149898,0.0,1.239605,2.305528,0.768867,4.392087,1.867076,0.854456,1.852938,0.878973,1.003674,0.0,2.120631,0.745903,1.127494,2.043196,0.959247,1.267659,2.312767,0.0,0.083097,1.609205,1.203304,1.924771,0.029327,3.648593,1.413234,0.172621,0.623376,0.936313,1.694158,1.841371,1.612098,0.0,0.630838,0.210517,2.777541,1.368024,0.0,1.433687,0.030999,0.0,1.869062,0.0,0.506166,3.004587,1.543215,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,2.886595,0.356006,0.0,0.0,1.343125,2.15615,0.233491,1.736922,1.340923,0.0
8,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.48.JPG,3.0,8,1.36455,1.196648,1.202601,1.72275,2.092651,0.768867,4.392087,2.174436,0.854456,0.745564,0.878973,1.317308,0.064979,0.768897,0.745903,0.342631,3.052859,0.959247,1.267659,2.473674,0.345172,0.0,0.614976,1.594949,1.142078,0.568776,2.485211,0.880425,0.0,1.483663,0.75348,1.694158,0.0,2.688838,0.01857,0.560717,0.672527,2.822619,2.387705,0.0,1.433687,0.878423,0.0,0.412849,0.0,0.400011,2.533387,0.820365,0.788177,0.205816,0.354045,3.52474,2.901803,1.90379,3.745479,0.833263,0.0,0.083542,1.343125,2.48711,0.64365,1.635706,1.340923,0.0
9,4,00400124.12.JPG,2018-01-24,2018-01-09,15.0,67.0,Female,171.0,64.0,3.0,head,400124,00400124.07.JPG,3.0,00400124.50.JPG,3.0,9,2.325711,1.149898,2.015438,2.271382,2.663424,0.860615,4.392087,2.10403,0.854456,1.41329,0.878973,1.429249,0.14708,0.668364,0.745903,0.899392,2.884044,0.959247,1.267659,3.483876,0.286416,0.0,1.04108,1.934499,1.810672,0.029327,2.485211,1.72493,0.0,0.627335,0.75348,1.694158,1.144271,2.683677,0.184001,0.560717,1.453448,3.966561,2.678915,0.0,1.433687,1.31813,0.0,0.720403,0.0,0.400011,3.220469,0.373965,0.788177,0.0,0.354045,3.52474,2.901803,1.90379,4.071622,1.325187,0.0,0.525813,1.343125,3.287059,1.588457,1.635706,1.340923,0.0


(42080, 81)


In [9]:
# save as csv file 
data_w_embs.to_csv('../data/Gelderman_SOD_cohort/data_w_embs', index=False)