In [99]:
import sys, os, shutil, PIL
from pathlib import Path
import pandas as pd
from PIL import Image
import numpy as np

In [100]:
basedir = Path(os.path.abspath("../data/cbis-ddsm/"))

In [101]:
calc_test = pd.read_csv(basedir / "calc_case_description_test_set.csv")
mass_test = pd.read_csv(basedir / "mass_case_description_test_set.csv")
meta = pd.read_csv(basedir / "manifest-ZkhPvrLo5216730872708713142" / "metadata2.csv")

In [102]:
# Align their columns
calc_test = calc_test.rename(columns={
    "breast density":"breast_density",
    })

# Concat test cases
all_test = pd.concat([calc_test, mass_test])

In [103]:
all_test['patient_id'].nunique()

349

In [147]:
all_test['image file path'].nunique()

645

In [104]:
def ambigious_labels(df, pathology_variable):
    # Remove cases with ambiguous labels, as those correspond to ROI-level labels (rather than whole image)
    counts = df.groupby(["patient_id"])[pathology_variable].nunique().reset_index(name='count') \
                                .sort_values(['count'], ascending=False)
    print(counts)
    #multi_label_list = list(counts[counts["count"] > 1]["image file path"])
    #df = df[~df["image file path"].isin(multi_label_list)]

In [105]:
ambigious_labels(calc_test, "pathology_fixed")
ambigious_labels(mass_test, "pathology_fixed")

KeyError: 'Column not found: pathology_fixed'

In [106]:
def clean_dupli(df):
    df = df.sort_values(by=['pathology'], ascending=False)
    df.drop_duplicates(subset=['image file path'], inplace=True)
    return df

In [107]:
calc_test2 = clean_dupli(calc_test)
mass_test2 = clean_dupli(mass_test)

In [144]:
def get_specs(df):
    print("Unique patient_id")
    print(df['patient_id'].nunique())
    df["pathology_fixed"] = "BENIGN"
    df.loc[(df["pathology"] == "MALIGNANT"), "pathology_fixed"] = "MALIGNANT"
    #pathology_counts = df.groupby("pathology_fixed").nunique()[["image file path"]]
    #print(pathology_counts)
    mdf = df[df["pathology_fixed"] == "MALIGNANT"]
    bdf = df[df["pathology_fixed"] == "BENIGN"]
    
    print("\n")
    print("Malignant")
    print(mdf[["patient_id"]].nunique())
    print("\n")
    print("Benign")
    print(bdf[["patient_id"]].nunique())

In [145]:
# Calc test set
get_specs(calc_test)

Unique patient_id
151


Malignant
patient_id    67
dtype: int64


Benign
patient_id    85
dtype: int64


In [146]:
# Mass test set
get_specs(mass_test)

Unique patient_id
201


Malignant
patient_id    85
dtype: int64


Benign
patient_id    121
dtype: int64


In [112]:
calc_test.groupby("patient_id").nunique()

Unnamed: 0_level_0,breast_density,left or right breast,image view,abnormality id,abnormality type,calc type,calc distribution,assessment,pathology,subtlety,image file path,cropped image file path,ROI mask file path,pathology_fixed
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
P_00038,1,2,2,2,1,2,1,2,2,2,4,6,6,1
P_00041,1,1,2,1,1,1,0,1,1,1,2,2,2,1
P_00077,1,2,2,2,1,3,0,1,1,2,4,6,6,1
P_00100,1,1,2,1,1,1,1,1,1,1,2,2,2,1
P_00127,1,1,2,1,1,1,1,1,1,1,2,2,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
P_02420,1,1,2,1,1,1,1,1,1,1,2,2,2,1
P_02432,1,1,2,1,1,2,1,1,1,1,2,2,2,1
P_02464,1,1,2,1,1,1,1,1,1,1,2,2,2,1
P_02498,1,1,2,1,1,1,1,1,1,1,2,2,2,1


In [72]:
calc_test

Unnamed: 0,patient_id,breast_density,left or right breast,image view,abnormality id,abnormality type,calc type,calc distribution,assessment,pathology,subtlety,image file path,cropped image file path,ROI mask file path,pathology_fixed
325,P_02501,3,RIGHT,MLO,1,calcification,PLEOMORPHIC,CLUSTERED,0,MALIGNANT,3,Calc-Test_P_02501_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_02501_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_02501_RIGHT_MLO_1/1.3.6.1.4.1.9590...,MALIGNANT
132,P_00827,4,RIGHT,MLO,1,calcification,PLEOMORPHIC,CLUSTERED,4,MALIGNANT,1,Calc-Test_P_00827_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_00827_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_00827_RIGHT_MLO_1/1.3.6.1.4.1.9590...,MALIGNANT
271,P_01743,0,RIGHT,CC,1,calcification,PLEOMORPHIC,DIFFUSELY_SCATTERED,5,MALIGNANT,5,Calc-Test_P_01743_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_01743_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_01743_RIGHT_CC_1/1.3.6.1.4.1.9590....,MALIGNANT
135,P_00857,3,RIGHT,CC,1,calcification,AMORPHOUS,SEGMENTAL,4,MALIGNANT,2,Calc-Test_P_00857_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_00857_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_00857_RIGHT_CC_1/1.3.6.1.4.1.9590....,MALIGNANT
136,P_00857,3,RIGHT,MLO,1,calcification,AMORPHOUS,SEGMENTAL,4,MALIGNANT,2,Calc-Test_P_00857_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_00857_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_00857_RIGHT_MLO_1/1.3.6.1.4.1.9590...,MALIGNANT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,P_01006,4,LEFT,MLO,1,calcification,AMORPHOUS,CLUSTERED,4,BENIGN,1,Calc-Test_P_01006_LEFT_MLO/1.3.6.1.4.1.9590.10...,Calc-Test_P_01006_LEFT_MLO_1/1.3.6.1.4.1.9590....,Calc-Test_P_01006_LEFT_MLO_1/1.3.6.1.4.1.9590....,BENIGN
156,P_01022,3,LEFT,CC,1,calcification,PUNCTATE-AMORPHOUS,CLUSTERED,4,BENIGN,3,Calc-Test_P_01022_LEFT_CC/1.3.6.1.4.1.9590.100...,Calc-Test_P_01022_LEFT_CC_1/1.3.6.1.4.1.9590.1...,Calc-Test_P_01022_LEFT_CC_1/1.3.6.1.4.1.9590.1...,BENIGN
157,P_01022,3,LEFT,MLO,1,calcification,ROUND_AND_REGULAR-PLEOMORPHIC,CLUSTERED,4,BENIGN,3,Calc-Test_P_01022_LEFT_MLO/1.3.6.1.4.1.9590.10...,Calc-Test_P_01022_LEFT_MLO_1/1.3.6.1.4.1.9590....,Calc-Test_P_01022_LEFT_MLO_1/1.3.6.1.4.1.9590....,BENIGN
162,P_01042,2,RIGHT,CC,1,calcification,PLEOMORPHIC,CLUSTERED,4,BENIGN,5,Calc-Test_P_01042_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_01042_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_01042_RIGHT_CC_1/1.3.6.1.4.1.9590....,BENIGN


In [72]:
calc_test

Unnamed: 0,patient_id,breast_density,left or right breast,image view,abnormality id,abnormality type,calc type,calc distribution,assessment,pathology,subtlety,image file path,cropped image file path,ROI mask file path,pathology_fixed
325,P_02501,3,RIGHT,MLO,1,calcification,PLEOMORPHIC,CLUSTERED,0,MALIGNANT,3,Calc-Test_P_02501_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_02501_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_02501_RIGHT_MLO_1/1.3.6.1.4.1.9590...,MALIGNANT
132,P_00827,4,RIGHT,MLO,1,calcification,PLEOMORPHIC,CLUSTERED,4,MALIGNANT,1,Calc-Test_P_00827_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_00827_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_00827_RIGHT_MLO_1/1.3.6.1.4.1.9590...,MALIGNANT
271,P_01743,0,RIGHT,CC,1,calcification,PLEOMORPHIC,DIFFUSELY_SCATTERED,5,MALIGNANT,5,Calc-Test_P_01743_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_01743_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_01743_RIGHT_CC_1/1.3.6.1.4.1.9590....,MALIGNANT
135,P_00857,3,RIGHT,CC,1,calcification,AMORPHOUS,SEGMENTAL,4,MALIGNANT,2,Calc-Test_P_00857_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_00857_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_00857_RIGHT_CC_1/1.3.6.1.4.1.9590....,MALIGNANT
136,P_00857,3,RIGHT,MLO,1,calcification,AMORPHOUS,SEGMENTAL,4,MALIGNANT,2,Calc-Test_P_00857_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_00857_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_00857_RIGHT_MLO_1/1.3.6.1.4.1.9590...,MALIGNANT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,P_01006,4,LEFT,MLO,1,calcification,AMORPHOUS,CLUSTERED,4,BENIGN,1,Calc-Test_P_01006_LEFT_MLO/1.3.6.1.4.1.9590.10...,Calc-Test_P_01006_LEFT_MLO_1/1.3.6.1.4.1.9590....,Calc-Test_P_01006_LEFT_MLO_1/1.3.6.1.4.1.9590....,BENIGN
156,P_01022,3,LEFT,CC,1,calcification,PUNCTATE-AMORPHOUS,CLUSTERED,4,BENIGN,3,Calc-Test_P_01022_LEFT_CC/1.3.6.1.4.1.9590.100...,Calc-Test_P_01022_LEFT_CC_1/1.3.6.1.4.1.9590.1...,Calc-Test_P_01022_LEFT_CC_1/1.3.6.1.4.1.9590.1...,BENIGN
157,P_01022,3,LEFT,MLO,1,calcification,ROUND_AND_REGULAR-PLEOMORPHIC,CLUSTERED,4,BENIGN,3,Calc-Test_P_01022_LEFT_MLO/1.3.6.1.4.1.9590.10...,Calc-Test_P_01022_LEFT_MLO_1/1.3.6.1.4.1.9590....,Calc-Test_P_01022_LEFT_MLO_1/1.3.6.1.4.1.9590....,BENIGN
162,P_01042,2,RIGHT,CC,1,calcification,PLEOMORPHIC,CLUSTERED,4,BENIGN,5,Calc-Test_P_01042_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_01042_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_01042_RIGHT_CC_1/1.3.6.1.4.1.9590....,BENIGN


In [72]:
calc_test

Unnamed: 0,patient_id,breast_density,left or right breast,image view,abnormality id,abnormality type,calc type,calc distribution,assessment,pathology,subtlety,image file path,cropped image file path,ROI mask file path,pathology_fixed
325,P_02501,3,RIGHT,MLO,1,calcification,PLEOMORPHIC,CLUSTERED,0,MALIGNANT,3,Calc-Test_P_02501_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_02501_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_02501_RIGHT_MLO_1/1.3.6.1.4.1.9590...,MALIGNANT
132,P_00827,4,RIGHT,MLO,1,calcification,PLEOMORPHIC,CLUSTERED,4,MALIGNANT,1,Calc-Test_P_00827_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_00827_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_00827_RIGHT_MLO_1/1.3.6.1.4.1.9590...,MALIGNANT
271,P_01743,0,RIGHT,CC,1,calcification,PLEOMORPHIC,DIFFUSELY_SCATTERED,5,MALIGNANT,5,Calc-Test_P_01743_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_01743_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_01743_RIGHT_CC_1/1.3.6.1.4.1.9590....,MALIGNANT
135,P_00857,3,RIGHT,CC,1,calcification,AMORPHOUS,SEGMENTAL,4,MALIGNANT,2,Calc-Test_P_00857_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_00857_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_00857_RIGHT_CC_1/1.3.6.1.4.1.9590....,MALIGNANT
136,P_00857,3,RIGHT,MLO,1,calcification,AMORPHOUS,SEGMENTAL,4,MALIGNANT,2,Calc-Test_P_00857_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_00857_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_00857_RIGHT_MLO_1/1.3.6.1.4.1.9590...,MALIGNANT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,P_01006,4,LEFT,MLO,1,calcification,AMORPHOUS,CLUSTERED,4,BENIGN,1,Calc-Test_P_01006_LEFT_MLO/1.3.6.1.4.1.9590.10...,Calc-Test_P_01006_LEFT_MLO_1/1.3.6.1.4.1.9590....,Calc-Test_P_01006_LEFT_MLO_1/1.3.6.1.4.1.9590....,BENIGN
156,P_01022,3,LEFT,CC,1,calcification,PUNCTATE-AMORPHOUS,CLUSTERED,4,BENIGN,3,Calc-Test_P_01022_LEFT_CC/1.3.6.1.4.1.9590.100...,Calc-Test_P_01022_LEFT_CC_1/1.3.6.1.4.1.9590.1...,Calc-Test_P_01022_LEFT_CC_1/1.3.6.1.4.1.9590.1...,BENIGN
157,P_01022,3,LEFT,MLO,1,calcification,ROUND_AND_REGULAR-PLEOMORPHIC,CLUSTERED,4,BENIGN,3,Calc-Test_P_01022_LEFT_MLO/1.3.6.1.4.1.9590.10...,Calc-Test_P_01022_LEFT_MLO_1/1.3.6.1.4.1.9590....,Calc-Test_P_01022_LEFT_MLO_1/1.3.6.1.4.1.9590....,BENIGN
162,P_01042,2,RIGHT,CC,1,calcification,PLEOMORPHIC,CLUSTERED,4,BENIGN,5,Calc-Test_P_01042_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_01042_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_01042_RIGHT_CC_1/1.3.6.1.4.1.9590....,BENIGN


In [42]:
meta

Unnamed: 0,Series UID,Collection,3rd Party Analysis,Data Description URI,Subject ID,Study UID,Study Description,Study Date,Series Description,Manufacturer,Modality,SOP Class Name,SOP Class UID,Number of Images,Unnamed: 14,File Size,File Location,Download Timestamp
0,1.3.6.1.4.1.9590.100.1.2.419081637812053404913...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_LEFT_CC_1,1.3.6.1.4.1.9590.100.1.2.161465562211359959230...,,08-29-2017,ROI mask images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,2,14,06 MB,.\CBIS-DDSM\Calc-Test_P_00038_LEFT_CC_1\08-29-...,2022-02-28T21:13:14.487
1,1.3.6.1.4.1.9590.100.1.2.188613955710170417803...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_LEFT_MLO_1,1.3.6.1.4.1.9590.100.1.2.291121996131431385353...,,08-29-2017,ROI mask images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,2,14,62 MB,.\CBIS-DDSM\Calc-Test_P_00038_LEFT_MLO_1\08-29...,2022-02-28T21:13:28.105
2,1.3.6.1.4.1.9590.100.1.2.374115997511889073021...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_LEFT_CC,1.3.6.1.4.1.9590.100.1.2.859354343102033567126...,,08-29-2017,full mammogram images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,27,84 MB,.\CBIS-DDSM\Calc-Test_P_00038_LEFT_CC\08-29-20...,2022-02-28T21:13:35.311
3,1.3.6.1.4.1.9590.100.1.2.174390361112646747718...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_LEFT_MLO,1.3.6.1.4.1.9590.100.1.2.384159464510350889125...,,08-29-2017,full mammogram images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,28,97 MB,.\CBIS-DDSM\Calc-Test_P_00038_LEFT_MLO\08-29-2...,2022-02-28T21:13:36.483
4,1.3.6.1.4.1.9590.100.1.2.244876997513875090239...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_RIGHT_CC_1,1.3.6.1.4.1.9590.100.1.2.200764632211227648028...,,08-29-2017,ROI mask images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,2,13,41 MB,.\CBIS-DDSM\Calc-Test_P_00038_RIGHT_CC_1\08-29...,2022-02-28T21:13:46.913
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6770,1.3.6.1.4.1.9590.100.1.2.321062807811123845106...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Mass-Training_P_02092_LEFT_MLO_1,1.3.6.1.4.1.9590.100.1.2.195655760513031195523...,,07-20-2016,cropped images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,379,24 KB,.\CBIS-DDSM\Mass-Training_P_02092_LEFT_MLO_1\0...,2022-03-01T21:24:32.04
6771,1.3.6.1.4.1.9590.100.1.2.203989029910964209440...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Mass-Training_P_02092_LEFT_MLO_1,1.3.6.1.4.1.9590.100.1.2.222512969612930058132...,,07-21-2016,ROI mask images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,14,14 MB,.\CBIS-DDSM\Mass-Training_P_02092_LEFT_MLO_1\0...,2022-03-01T21:24:42.228
6772,1.3.6.1.4.1.9590.100.1.2.290251769212905477734...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Mass-Training_P_02092_LEFT_MLO,1.3.6.1.4.1.9590.100.1.2.322851018411213611731...,,07-20-2016,full mammogram images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,37,35 MB,.\CBIS-DDSM\Mass-Training_P_02092_LEFT_MLO\07-...,2022-03-01T21:25:04.66
6773,1.3.6.1.4.1.9590.100.1.2.412558050811722685411...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Mass-Training_P_02079_RIGHT_MLO,1.3.6.1.4.1.9590.100.1.2.791687062125620465047...,,07-20-2016,full mammogram images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,36,04 MB,.\CBIS-DDSM\Mass-Training_P_02079_RIGHT_MLO\07...,2022-03-01T21:25:15.016


In [96]:
meta

Unnamed: 0,Series UID,Collection,3rd Party Analysis,Data Description URI,Subject ID,Study UID,Study Description,Study Date,Series Description,Manufacturer,Modality,SOP Class Name,SOP Class UID,Number of Images,Unnamed: 14,File Size,File Location,Download Timestamp
0,1.3.6.1.4.1.9590.100.1.2.419081637812053404913...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_LEFT_CC_1,1.3.6.1.4.1.9590.100.1.2.161465562211359959230...,,08-29-2017,ROI mask images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,2,14,06 MB,.\CBIS-DDSM\Calc-Test_P_00038_LEFT_CC_1\08-29-...,2022-02-28T21:13:14.487
1,1.3.6.1.4.1.9590.100.1.2.188613955710170417803...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_LEFT_MLO_1,1.3.6.1.4.1.9590.100.1.2.291121996131431385353...,,08-29-2017,ROI mask images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,2,14,62 MB,.\CBIS-DDSM\Calc-Test_P_00038_LEFT_MLO_1\08-29...,2022-02-28T21:13:28.105
2,1.3.6.1.4.1.9590.100.1.2.374115997511889073021...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_LEFT_CC,1.3.6.1.4.1.9590.100.1.2.859354343102033567126...,,08-29-2017,full mammogram images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,27,84 MB,.\CBIS-DDSM\Calc-Test_P_00038_LEFT_CC\08-29-20...,2022-02-28T21:13:35.311
3,1.3.6.1.4.1.9590.100.1.2.174390361112646747718...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_LEFT_MLO,1.3.6.1.4.1.9590.100.1.2.384159464510350889125...,,08-29-2017,full mammogram images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,28,97 MB,.\CBIS-DDSM\Calc-Test_P_00038_LEFT_MLO\08-29-2...,2022-02-28T21:13:36.483
4,1.3.6.1.4.1.9590.100.1.2.244876997513875090239...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Calc-Test_P_00038_RIGHT_CC_1,1.3.6.1.4.1.9590.100.1.2.200764632211227648028...,,08-29-2017,ROI mask images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,2,13,41 MB,.\CBIS-DDSM\Calc-Test_P_00038_RIGHT_CC_1\08-29...,2022-02-28T21:13:46.913
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6770,1.3.6.1.4.1.9590.100.1.2.321062807811123845106...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Mass-Training_P_02092_LEFT_MLO_1,1.3.6.1.4.1.9590.100.1.2.195655760513031195523...,,07-20-2016,cropped images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,379,24 KB,.\CBIS-DDSM\Mass-Training_P_02092_LEFT_MLO_1\0...,2022-03-01T21:24:32.04
6771,1.3.6.1.4.1.9590.100.1.2.203989029910964209440...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Mass-Training_P_02092_LEFT_MLO_1,1.3.6.1.4.1.9590.100.1.2.222512969612930058132...,,07-21-2016,ROI mask images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,14,14 MB,.\CBIS-DDSM\Mass-Training_P_02092_LEFT_MLO_1\0...,2022-03-01T21:24:42.228
6772,1.3.6.1.4.1.9590.100.1.2.290251769212905477734...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Mass-Training_P_02092_LEFT_MLO,1.3.6.1.4.1.9590.100.1.2.322851018411213611731...,,07-20-2016,full mammogram images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,37,35 MB,.\CBIS-DDSM\Mass-Training_P_02092_LEFT_MLO\07-...,2022-03-01T21:25:04.66
6773,1.3.6.1.4.1.9590.100.1.2.412558050811722685411...,CBIS-DDSM,,https://doi.org/10.7937/K9/TCIA.2016.7O02S9CY,Mass-Training_P_02079_RIGHT_MLO,1.3.6.1.4.1.9590.100.1.2.791687062125620465047...,,07-20-2016,full mammogram images,,MG,Secondary Capture Image Storage,1.2.840.10008.5.1.4.1.1.7,1,36,04 MB,.\CBIS-DDSM\Mass-Training_P_02079_RIGHT_MLO\07...,2022-03-01T21:25:15.016


In [148]:
all_test.drop_duplicates([""])

Unnamed: 0,patient_id,breast_density,left or right breast,image view,abnormality id,abnormality type,calc type,calc distribution,assessment,pathology,subtlety,image file path,cropped image file path,ROI mask file path,mass shape,mass margins
0,P_00038,2,LEFT,CC,1,calcification,PUNCTATE-PLEOMORPHIC,CLUSTERED,4,BENIGN,2,Calc-Test_P_00038_LEFT_CC/1.3.6.1.4.1.9590.100...,Calc-Test_P_00038_LEFT_CC_1/1.3.6.1.4.1.9590.1...,Calc-Test_P_00038_LEFT_CC_1/1.3.6.1.4.1.9590.1...,,
1,P_00038,2,LEFT,MLO,1,calcification,PUNCTATE-PLEOMORPHIC,CLUSTERED,4,BENIGN,2,Calc-Test_P_00038_LEFT_MLO/1.3.6.1.4.1.9590.10...,Calc-Test_P_00038_LEFT_MLO_1/1.3.6.1.4.1.9590....,Calc-Test_P_00038_LEFT_MLO_1/1.3.6.1.4.1.9590....,,
2,P_00038,2,RIGHT,CC,1,calcification,VASCULAR,,2,BENIGN_WITHOUT_CALLBACK,5,Calc-Test_P_00038_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_00038_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_00038_RIGHT_CC_1/1.3.6.1.4.1.9590....,,
3,P_00038,2,RIGHT,CC,2,calcification,VASCULAR,,2,BENIGN_WITHOUT_CALLBACK,5,Calc-Test_P_00038_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_00038_RIGHT_CC_2/1.3.6.1.4.1.9590....,Calc-Test_P_00038_RIGHT_CC_2/1.3.6.1.4.1.9590....,,
4,P_00038,2,RIGHT,MLO,1,calcification,VASCULAR,,2,BENIGN_WITHOUT_CALLBACK,5,Calc-Test_P_00038_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_00038_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_00038_RIGHT_MLO_1/1.3.6.1.4.1.9590...,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
373,P_01825,2,RIGHT,MLO,1,mass,,,3,BENIGN_WITHOUT_CALLBACK,3,Mass-Test_P_01825_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Mass-Test_P_01825_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Mass-Test_P_01825_RIGHT_MLO_1/1.3.6.1.4.1.9590...,LOBULATED,MICROLOBULATED
374,P_01833,2,RIGHT,MLO,1,mass,,,5,MALIGNANT,5,Mass-Test_P_01833_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Mass-Test_P_01833_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Mass-Test_P_01833_RIGHT_MLO_1/1.3.6.1.4.1.9590...,IRREGULAR,ILL_DEFINED
375,P_01865,2,LEFT,MLO,1,mass,,,4,MALIGNANT,2,Mass-Test_P_01865_LEFT_MLO/1.3.6.1.4.1.9590.10...,Mass-Test_P_01865_LEFT_MLO_1/1.3.6.1.4.1.9590....,Mass-Test_P_01865_LEFT_MLO_1/1.3.6.1.4.1.9590....,IRREGULAR,ILL_DEFINED
376,P_01912,3,RIGHT,CC,1,mass,,,5,MALIGNANT,4,Mass-Test_P_01912_RIGHT_CC/1.3.6.1.4.1.9590.10...,Mass-Test_P_01912_RIGHT_CC_1/1.3.6.1.4.1.9590....,Mass-Test_P_01912_RIGHT_CC_1/1.3.6.1.4.1.9590....,IRREGULAR,SPICULATED
