# Negative samples for the dataset

In [3]:
import pandas as pd
import numpy as np
import pydicom
import png
import math
from PIL import Image
from pathlib import Path
import boto3
import matplotlib.pyplot as plt

data_path = '/home/szelesteya/projects/EMBED_Open_Data/'
tables_path = data_path + 'tables/'
image_root_path = '/media/szelesteya/F824D4D024D492CC/EMBED-images/'
image_dcm_path = image_root_path + 'dicom-negative/'
image_png_path = image_root_path + 'negative-full'

In [4]:
cli_df = pd.read_csv(tables_path + 'EMBED_OpenData_clinical.csv', low_memory=False)

# Only keeping result BIRADS-1 and BIRADS-2 screenings
neg_cli_df = cli_df[(cli_df.asses.isin(['N'])) & 
                    cli_df.desc.str.contains('screen', case=False)][['Unnamed: 0',
                                                                     'empi_anon',
                                                                     'acc_anon',
                                                                     'side',
                                                                     'calcfind',
                                                                     'calcdistri',
                                                                     'otherfind',
                                                                     'numfind',
                                                                     'path_severity',
                                                                     'age_at_study',
                                                                     'ETHNICITY_DESC',
                                                                     'study_date_anon',
                                                                     'asses']]

In [5]:
# Merging clinical information with medical ones
meta_df = (pd.read_csv(tables_path + 'EMBED_OpenData_metadata_reduced.csv', low_memory=False))
meta_red_df = meta_df[(meta_df['spot_mag'] != 1) &
                      (meta_df['FinalImageType'] == '2D') &
                      (meta_df['ViewPosition'] == 'CC')][[  'empi_anon',
                                                            'acc_anon',
                                                            'ImageLateralityFinal',
                                                            'anon_dicom_path',
                                                            'ViewPosition',
                                                            'ROI_coords']]

# Rename columns to prepare for merge
meta_red_ren_df = meta_red_df.rename(columns={'ImageLateralityFinal':'side'})

In [6]:
neg_full_df = neg_cli_df.merge(meta_red_ren_df, on=['empi_anon','acc_anon','side'])

# Keeping relevant columns
neg_empi_df = neg_full_df[[ 'empi_anon',
                           'acc_anon',
                           'anon_dicom_path',
                           'side',
                           'asses',
                           'age_at_study',
                           'calcfind',
                           'calcdistri',
                           'otherfind',
                           'numfind',
                           'ROI_coords',
                           'ETHNICITY_DESC',
                           'study_date_anon']]

# Rename columns to be more consistent
neg_empi_df = neg_empi_df.rename(columns={'ETHNICITY_DESC':'eth_desc',
                                          'calcfind':'calc_find',
                                          'calcdistri':'calc_distrib',
                                          'otherfind':'other_find',
                                          'numfind':'num_find'})

# Generate paths for png extraction
neg_empi_df.loc[:, 'relative_dcm_path'] = neg_empi_df['anon_dicom_path'].apply(lambda x: '/'.join(x.split('/')[5:]))

In [7]:
# Ordering negative data by study date
neg_empi_ord_df = neg_empi_df.sort_values(by=['study_date_anon'], ascending=False)

# Choosing unique patients
neg_empi_fin_df = neg_empi_ord_df.drop_duplicates(subset=['empi_anon','acc_anon','side'], keep='first')[['empi_anon',
                                                                                       'acc_anon',
                                                                                       'side',
                                                                                       'asses',
                                                                                       'age_at_study',
                                                                                       'relative_dcm_path',
                                                                                       'calc_find',
                                                                                       'calc_distrib',
                                                                                       'other_find',
                                                                                       'num_find',
                                                                                       'ROI_coords',
                                                                                       'eth_desc',
                                                                                       'study_date_anon']]

In [2]:
# neg_empi_fin_df[(neg_empi_fin_df['ROI_coords'] == '()') & neg_empi_fin_df['num_find'] == 1]

NameError: name 'neg_empi_fin_df' is not defined

In [8]:
with open(data_path + 'negative_empirical.csv', 'w') as f:
    neg_empi_fin_df.to_csv(f, index=False)

In [9]:
with open(data_path + 'negative_path.csv', 'w') as f:
    neg_empi_fin_df['relative_dcm_path'].to_csv(f, index=False)

In [10]:
%%bash -s "$image_dcm_path" "{data_path}negative_path.csv"

# Pulling dicom files with AWS CLI (Python API didn't work)
dcm_dest_path="$1"
dcm_paths="$2"
ind=$((1))

tail -n +2 $dcm_paths | head -n 200 | tail -n 100 | while IFS= read -r line; do
    relative_path=$(echo "$line" | awk -v OFS='/' '{$1=$1; print}')
    dcm_name=$(echo "$relative_path" | cut -d '/' -f 3-)
               
    file="${dcm_dest_path}$relative_path"
    dir=$(dirname "${dcm_dest_path}$dcm_name")
    echo "$ind / 1000"
    # echo "$file"
    # echo "${dcm_dest_path}$dcm_name"
    # echo "$dir"
    if [ -f "${dcm_dest_path}$dcm_name" ]; then
        echo "Moving file $file"
        mkdir -p $(dirname "$file") & mv "${dcm_dest_path}$dcm_name" "${dcm_dest_path}$relative_path"
        rmdir $dir
    else        
        if [ -f "$file" ]; then
            echo "File already present"
            rmdir $dir
        else
            echo "Pulling file $file"
            aws s3 cp "s3://embed-dataset-open/images/$relative_path" "${dcm_dest_path}/$dcm_name" --profile my-dev-profile
        fi
    fi

    ind=$((ind+1))
    clear
done

1 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.849.113973.3.60.1.62229455.20200820.1142447/1.2.841.113686.2750824660.1597905726.4609.53499': No such file or directory


[H[2J2 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113976.3.63.1.60684188.20191210.1133601/1.2.849.113682.2750824547.1575268587.4755.245082': No such file or directory


[H[2J3 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113976.3.64.1.60145817.20190923.1091857/1.2.840.113689.2750825171.1568616407.4986.19892': No such file or directory


[H[2J4 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.842.113977.3.65.1.60125080.20190912.1145047/1.2.848.113687.2750824502.1568269775.4748.315249': No such file or directory


[H[2J5 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113971.3.59.1.60657632.20191132.1150636/1.2.845.113682.2750824498.1574750062.4413.279955': No such file or directory


[H[2J6 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113977.3.57.1.60988685.20200122.1111133/1.2.840.113622.2.408.1196017357155224.27885200115111547.10009': No such file or directory


[H[2J7 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113971.3.58.1.62077516.20200730.1120807/1.2.848.113684.2750851775.1595920679.5030.58683': No such file or directory


[H[2J8 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.841.113976.3.65.1.60124396.20190914.1094129/1.2.848.113688.2750824504.1568269770.4749.130658': No such file or directory


[H[2J9 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113973.3.57.1.61837447.20200626.1115647/1.2.847.113689.2750828067.1592460498.4144.3018': No such file or directory


[H[2J10 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113971.3.57.1.60805113.20191222.1092631/1.2.847.113685.2750824981.1576649560.4319.20661': No such file or directory


[H[2J11 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113978.3.65.1.60161800.20190922.1102563/1.2.840.113682.2750824976.1568787029.4553.23701': No such file or directory


[H[2J12 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113978.3.59.1.60324295.20191019.1110261/1.2.847.113681.2750828067.1570687501.5080.30094': No such file or directory


[H[2J13 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113970.3.60.1.59929797.20190817.1132418/1.2.845.113682.2750824546.1565764301.4311.405317': No such file or directory


[H[2J14 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113970.3.65.1.62000061.20200723.1072959/1.2.848.113685.2750824546.1594882225.4901.1982': No such file or directory


[H[2J15 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113975.3.65.1.60442666.20191037.1101034/1.2.843.113684.2750824981.1572244686.4100.32711': No such file or directory


[H[2J16 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113973.3.59.1.60606856.20191125.1124834/1.2.846.113687.2750825167.1574144156.4670.40879': No such file or directory


[H[2J17 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.843.113970.3.58.1.60005836.20190830.1104161/1.2.840.113685.2750824656.1566801294.4352.216031': No such file or directory


[H[2J18 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113970.3.65.1.61342333.20200310.1075645/1.2.843.113682.2750824977.1583301683.3918.6118': No such file or directory


[H[2J19 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.841.113979.3.59.1.61403184.20200312.1131516/1.2.842.113687.2750824662.1583994906.4589.510938': No such file or directory


[H[2J20 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113976.3.65.1.60236663.20190934.1101118/1.2.844.113689.2750824482.1569566102.4557.39963': No such file or directory


[H[2J21 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113972.3.66.1.60304016.20191016.1131362/1.2.843.113684.2750824499.1570516273.4774.384702': No such file or directory


[H[2J22 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.843.113974.3.65.1.60714375.20191210.1123943/1.2.845.113689.2750824547.1575533331.4109.98498': No such file or directory


[H[2J23 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113976.3.65.1.60766082.20191217.1074030/1.2.841.113687.2750824490.1576218498.4666.10174': No such file or directory


[H[2J24 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113973.3.64.1.61788975.20200617.1094734/1.2.843.113690.2750824547.1591771553.4260.16492': No such file or directory


[H[2J25 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113972.3.62.1.59819509.20190738.1091336/1.2.849.113683.2750828066.1564380523.4125.80512': No such file or directory


[H[2J26 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.843.113974.3.57.1.61767279.20200615.1115945/1.2.844.113683.2750828069.1591427046.4704.204507': No such file or directory


[H[2J27 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113977.3.63.1.60186333.20190920.1075363/1.2.846.113687.2750824979.1568960333.4153.10199': No such file or directory


[H[2J28 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113979.3.58.1.60617367.20191125.1143539/1.2.847.113687.2750824984.1574230094.4417.44638': No such file or directory


[H[2J29 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113970.3.58.1.60035343.20190831.1100955/1.2.846.113681.2750824976.1567058865.4876.34571': No such file or directory


[H[2J30 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113979.3.59.1.60656899.20191127.1114218/1.2.847.113690.2750828053.1574748125.3001.450852': No such file or directory


[H[2J31 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.841.113971.3.61.1.59893337.20190813.1153134/1.2.846.113687.2750824499.1565263574.2612.68158': No such file or directory


[H[2J32 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113975.3.62.1.61123504.20200207.1145205/1.2.847.113688.2750825165.1580711000.4362.76996': No such file or directory


[H[2J33 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113975.3.63.1.59820895.20190737.1110058/1.2.846.113681.2750824977.1564380409.4209.28267': No such file or directory


[H[2J34 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.849.113974.3.63.1.60612425.20191121.1075921/1.2.840.113689.2750828046.1574316188.3129.650025': No such file or directory


[H[2J35 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113976.3.66.1.61283040.20200234.1094051/1.2.840.113684.2750824502.1582625767.3957.29017': No such file or directory


[H[2J36 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113973.3.62.1.59614765.20190626.1134159/1.2.849.113682.2750828062.1561530745.4492.232812': No such file or directory


[H[2J37 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113973.3.57.1.59850919.20190804.1075032/1.2.842.113690.2750851775.1564730808.4268.35720': No such file or directory


[H[2J38 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113974.3.61.1.60417787.20191029.1121143/1.2.848.113682.2750824545.1571898620.4811.224654': No such file or directory


[H[2J39 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.841.113970.3.61.1.61414378.20200322.1145125/1.2.846.113689.2750851773.1584082845.4129.282618': No such file or directory


[H[2J40 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113973.3.58.1.61023946.20200126.1102261/1.2.840.113683.2750824542.1579598731.4868.35916': No such file or directory


[H[2J41 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113971.3.66.1.59724039.20190723.1111941/1.2.844.113683.2750851752.1563174374.4221.26853': No such file or directory


[H[2J42 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113688.2750824981.1570107286.3480.19704/1.2.847.113682.2750824981.1570107283.3481.19708': No such file or directory


[H[2J43 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113975.3.64.1.59939284.20190821.1104803/1.2.846.113684.2750824545.1565850523.4519.202031': No such file or directory


[H[2J44 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113970.3.63.1.61397999.20200320.1081327/1.2.844.113689.2750824505.1583994738.4608.52234': No such file or directory


[H[2J45 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.843.113977.3.64.1.59741415.20190725.1101356/1.2.849.113682.2750824550.1563344994.4266.107483': No such file or directory


[H[2J46 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113979.3.65.1.60207791.20190928.1092761/1.2.843.113681.2750828051.1569306139.4141.302961': No such file or directory


[H[2J47 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.849.113972.3.57.1.59666060.20190706.1102955/1.2.841.113683.2130706441.1562308352.4291.83298': No such file or directory


[H[2J48 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113979.3.63.1.61065701.20200135.1080543/1.2.841.113683.2750824978.1580104944.3897.8206': No such file or directory


[H[2J49 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113975.3.64.1.59612467.20190633.1095615/1.2.845.113686.2750824542.1561531254.4581.25522': No such file or directory


[H[2J50 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113972.3.61.1.61129127.20200207.1093658/1.2.840.113690.2750824659.1580797952.2451.309845': No such file or directory


[H[2J51 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113978.3.58.1.61006604.20200118.1094355/1.2.844.113690.2750824549.1579242931.5086.53442': No such file or directory


[H[2J52 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113979.3.62.1.60322827.20191015.1110334/1.2.848.113689.2750828067.1570687508.5082.26943': No such file or directory


[H[2J53 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113975.3.58.1.61092171.20200139.1114126/1.2.849.113687.2750851754.1580366715.3558.45451': No such file or directory


[H[2J54 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113974.3.59.1.61129919.20200209.1120911/1.2.849.113689.2750851776.1580800197.4214.61847': No such file or directory


[H[2J55 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113971.3.58.1.60787975.20191225.1135444/1.2.841.113686.2750824544.1576477904.3952.221937': No such file or directory


[H[2J56 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.849.113977.3.61.1.61190468.20200212.1080808/1.2.841.113686.2750824501.1581489275.4492.82552': No such file or directory


[H[2J57 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113973.3.64.1.59416539.20190529.1094058/1.2.845.113686.2750828065.1559026896.1121.155113': No such file or directory


[H[2J58 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113971.3.58.1.59974036.20190828.1072963/1.2.845.113683.2750828066.1566367307.5001.61354': No such file or directory


[H[2J59 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113978.3.58.1.60202470.20190924.1092215/1.2.847.113682.2750824502.1569220284.4149.128399': No such file or directory


[H[2J60 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113977.3.66.1.61150195.20200208.1134160/1.2.848.113682.2750824980.1580968278.5029.34910': No such file or directory


[H[2J61 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.849.113973.3.62.1.59620498.20190635.1094222/1.2.843.113684.2750824545.1561617297.4412.17811': No such file or directory


[H[2J62 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.842.113977.3.65.1.61085805.20200134.1111220/1.2.841.113689.2750851776.1580290788.4207.34878': No such file or directory


[H[2J63 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.849.113974.3.65.1.61085701.20200138.1104231/1.2.847.113690.2750824482.1580279690.2721.38771': No such file or directory


[H[2J64 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113974.3.60.1.59573134.20190622.1093924/1.2.843.113682.2750824500.1561012169.4377.108278': No such file or directory


[H[2J65 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113979.3.59.1.61211173.20200218.1120717/1.2.845.113681.2750851776.1581663678.4505.64175': No such file or directory


[H[2J66 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.849.113976.3.65.1.59295512.20190514.1135611/1.2.844.113689.2750824551.1557383315.3995.44412': No such file or directory


[H[2J67 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.842.113978.3.59.1.61295419.20200228.1075008/1.2.844.113690.2750824977.1582783399.3874.7985': No such file or directory


[H[2J68 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.843.113976.3.65.1.59772146.20190731.1130816/1.2.848.113681.2750824507.1563776344.5047.162321': No such file or directory


[H[2J69 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113977.3.61.1.59517471.20190615.1131659/1.2.840.113684.2750824980.1560319503.5156.43663': No such file or directory


[H[2J70 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113973.3.62.1.59411905.20190530.1/1.2.848.113688.2750824664.1559025305.4667.22466': No such file or directory


[H[2J71 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113975.3.58.1.59886270.20190817.1075429/1.2.842.113690.2750824981.1565244729.2267.6894': No such file or directory


[H[2J72 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113979.3.62.1.60884964.20191231.1082736/1.2.841.113682.2750824491.1577771764.5101.218405': No such file or directory


[H[2J73 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113976.3.65.1.60945533.20200117.1101158/1.2.844.113621.2.404.1196017357155222.13960200109104056.10017': No such file or directory


[H[2J74 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113975.3.60.1.59752594.20190721.1105126/1.2.843.113684.2750825164.1563429871.860.32247': No such file or directory


[H[2J75 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113978.3.64.1.60606309.20191127.1104945/1.2.840.113689.2750824550.1574145038.4488.123624': No such file or directory


[H[2J76 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.841.113979.3.60.1.59533325.20190615.1090729/1.2.847.113687.2750828067.1560495626.5078.231363': No such file or directory


[H[2J77 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113977.3.65.1.59749343.20190720.1/1.2.846.113689.2750825172.1563429869.857.4958': No such file or directory


[H[2J78 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.843.113977.3.59.1.59236891.20190507.1093039/1.2.849.113690.2750824975.1556690771.5244.24894': No such file or directory


[H[2J79 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.844.113973.3.65.1.59817799.20190735.1105428/1.2.843.113689.2750824979.1564380412.4210.32489': No such file or directory


[H[2J80 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113978.3.66.1.61071493.20200132.1135226/1.2.847.113685.2750824663.1580106766.4816.622584': No such file or directory


[H[2J81 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.842.113970.3.58.1.59664232.20190711.1141318/1.2.842.113682.2750824549.1562308345.4820.133405': No such file or directory


[H[2J82 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113970.3.65.1.59285146.20190512.1091622/1.2.849.113687.2750824549.1557300241.3210.23762': No such file or directory


[H[2J83 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113979.3.58.1.59982268.20190822.1101847/1.2.849.113688.2750824502.1566455617.4226.79988': No such file or directory


[H[2J84 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.842.113974.3.64.1.59579956.20190624.1095453/1.2.848.113688.2750824499.1561098917.4971.83920': No such file or directory


[H[2J85 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113976.3.66.1.59784366.20190728.1113803/1.2.844.113688.2750824981.1563861972.5184.27607': No such file or directory


[H[2J86 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113970.3.61.1.61241869.20200228.1141625/1.2.845.113681.2750824545.1582094081.4561.284821': No such file or directory


[H[2J87 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113970.3.58.1.59913702.20190819.1121111/1.2.842.113689.2750828067.1565589467.5029.32194': No such file or directory


[H[2J88 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113971.3.60.1.59616249.20190634.1090136/1.2.840.113690.2750824500.1561617313.4625.69257': No such file or directory


[H[2J89 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113975.3.58.1.60667329.20191129.1144622/1.2.841.113684.2750824503.1574836135.4339.265842': No such file or directory


[H[2J90 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.841.113976.3.58.1.60459615.20191033.1093545/1.2.842.113683.2750824503.1572416995.4308.126443': No such file or directory


[H[2J91 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.847.113974.3.62.1.60530795.20191111.1135117/1.2.846.113687.2750824507.1573194731.2013.254591': No such file or directory


[H[2J92 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.843.113973.3.61.1.59802810.20190732.1151223/1.2.849.113685.2750825173.1564034857.4778.46699': No such file or directory


[H[2J93 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113977.3.65.1.60898816.20200108.1132233/1.2.842.113688.2750851776.1577948455.4359.78533': No such file or directory


[H[2J94 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.846.113973.3.65.1.59590171.20190628.1/1.2.843.113690.2750828070.1561358670.5081.62564': No such file or directory


[H[2J95 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.845.113970.3.66.1.60615978.20191130.1103323/1.2.848.113686.2750824973.1574316549.4245.34271': No such file or directory


[H[2J96 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.849.113975.3.57.1.60601860.20191126.1093606/1.2.848.113690.2750824503.1574145045.4652.93066': No such file or directory


[H[2J97 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.840.113977.3.65.1.60735566.20191214.1100456/1.2.845.113682.2750824976.1575872007.4805.27979': No such file or directory


[H[2J98 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.848.113977.3.66.1.60984497.20200116.1154344/1.2.847.113686.2750824498.1578983976.4480.257677': No such file or directory


[H[2J99 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.843.113975.3.61.1.60965565.20200119.1081928/1.2.846.113689.2750828066.1578895705.4947.18632': No such file or directory


[H[2J100 / 1000
File already present


rmdir: failed to remove '/media/szelesteya/F824D4D024D492CC/EMBED-images/dicom-negative/1.2.849.113970.3.65.1.60588064.20191118.1071251/1.2.848.113686.2750828054.1574059427.2781.67880': No such file or directory


[H[2J

In [11]:
# Rescale the intensity of the image to get heterogene images with the bit depth of 14
def rescale_to_8bit(image_array):
    upper_percentile = np.percentile(image_array.flatten(), 98) # original_max = np.max(image_array)
    lower_percentile = np.percentile(image_array.flatten(), 2) # original_min = np.min(image_array)
    # max_on_14bit = 16383
    max = 255
    rescaled_array = (image_array - lower_percentile) / (upper_percentile - lower_percentile)
    rescaled_array[rescaled_array < 0] = 0
    rescaled_array[rescaled_array > 1] = 1
    # rescaled_array = np.round((image_array - original_min) / (original_max - original_min) * max_on_14bit).astype(int)
    return np.round(rescaled_array * 255).astype(np.uint8)

# Save DICOM pixel array as PNG
def save_dcm_image_as_png(image, png_filename, bitdepth=8):
    with open(png_filename, 'wb') as f:
        rescaled = rescale_to_8bit(image)
        writer = png.Writer(height=rescaled.shape[0], 
                            width=rescaled.shape[1], 
                            bitdepth=bitdepth, 
                            greyscale=True)
        writer.write(f, rescaled.tolist())

def generate_png_path(empi_anon, png_dir):
    # Get new file name
    new_fn = f"{empi_anon}_neg_conv.png"
    return f'{png_dir}/{new_fn}'

# Convert list of DICOMs to PNGs
def process_dcm_list(dcm_list, png_list):    
    for i, dcm_path in enumerate(dcm_list):    
        if not Path(png_list[i]).exists():
            print(f"Processing DICOM #{i}...")
            
            # Load DICOM
            dcm = pydicom.dcmread(dcm_path)
            img = dcm.pixel_array
            
            # Save PNG            
            save_dcm_image_as_png(img, png_list[i])

def extract_images(data_file_name, dcm_dir, png_dir):
    # Provide a list of DICOM paths and a target directory
    dcm_list = []
    df = pd.read_csv(data_file_name)
    
    for index, row in df.iterrows():
        path = dcm_dir  + row['relative_dcm_path']
        if Path(path).exists():
            dcm_list.append(path)
        
    # Insert png path
    df.loc[:,'png_path'] = df['empi_anon'].apply(lambda x: generate_png_path(x, png_dir))

    # Convert DICOMs
    process_dcm_list(dcm_list[0:300], df['png_path'])

    return df

In [12]:
neg_img_emp = extract_images(data_path + 'negative_empirical.csv', image_dcm_path, image_png_path)
# negative['intensity'] = calculate_intensity_of_png(negative)

with open(data_path + 'negative_empirical_png.csv', 'w') as f:
    (neg_img_emp)[['empi_anon',
                'acc_anon',
                'side',
                'age_at_study',
                'relative_dcm_path',
                'calc_find',
                'calc_distrib',
                'other_find',
                'num_find',
                'eth_desc',
                'study_date_anon',
                'png_path']].to_csv(f, index=False)

Processing DICOM #63...


In [14]:
neg_img_emp[~neg_img_emp['other_find'].notna()]


Unnamed: 0,empi_anon,acc_anon,side,asses,age_at_study,relative_dcm_path,calc_find,calc_distrib,other_find,num_find,ROI_coords,eth_desc,study_date_anon,png_path
0,29011563,1572834723540517,R,N,82.651937,cohort_1/29011563/1.2.846.113979.3.61.1.622904...,,,,1,(),African American or Black,2021-02-17,/media/szelesteya/F824D4D024D492CC/EMBED-image...
1,11485524,6356263291812846,L,N,74.038481,cohort_1/11485524/1.2.844.113973.3.64.1.622065...,,,,1,(),African American or Black,2021-02-10,/media/szelesteya/F824D4D024D492CC/EMBED-image...
2,41042249,9346948292263998,R,N,67.763198,cohort_2/41042249/1.2.845.113978.3.60.1.621182...,,,,1,(),Caucasian or White,2021-01-31,/media/szelesteya/F824D4D024D492CC/EMBED-image...
3,12927057,6114291372605113,R,N,68.045203,cohort_2/12927057/1.2.841.113975.3.61.1.622750...,,,,1,(),Caucasian or White,2021-01-24,/media/szelesteya/F824D4D024D492CC/EMBED-image...
4,60191895,9641409346677793,R,N,62.073828,cohort_1/60191895/1.2.843.113978.3.64.1.621798...,,,,1,(),African American or Black,2021-01-23,/media/szelesteya/F824D4D024D492CC/EMBED-image...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1941,10412436,4784297769562945,R,N,80.519107,cohort_2/10412436/1.2.842.113970.3.61.1.473439...,,,,1,(),Caucasian or White,2012-09-19,/media/szelesteya/F824D4D024D492CC/EMBED-image...
1942,72504682,7661551911935583,R,N,57.318083,cohort_2/72504682/1.2.847.113977.3.66.1.474765...,,,,1,(),Asian,2012-09-18,/media/szelesteya/F824D4D024D492CC/EMBED-image...
1943,79148688,5277430237922202,L,N,56.923825,cohort_2/79148688/1.2.847.113976.3.62.1.473578...,,,,1,(),Caucasian or White,2012-08-26,/media/szelesteya/F824D4D024D492CC/EMBED-image...
1944,60319885,1310615916903458,R,N,59.650780,cohort_2/60319885/1.2.849.113976.3.64.1.474447...,,,,1,(),African American or Black,2012-08-23,/media/szelesteya/F824D4D024D492CC/EMBED-image...
