In [1]:
import pandas as pd
import numpy as np
import pydicom
import glob

In [2]:
## First, read all of my DICOM files into a list
mydicoms = glob.glob("*.dcm")

### Let's look at the contents of the first DICOM:

In [3]:
dcm1 = pydicom.dcmread(mydicoms[0])
dcm1

Dataset.file_meta -------------------------------
(0002,0000) File Meta Information Group Length  UL: 204
(0002,0001) File Meta Information Version       OB: b'\x00\x01'
(0002,0002) Media Storage SOP Class UID         UI: Secondary Capture Image Storage
(0002,0003) Media Storage SOP Instance UID      UI: 1.3.6.1.4.1.11129.5.5.139539879914217162512411239901306132962191
(0002,0010) Transfer Syntax UID                 UI: Implicit VR Little Endian
(0002,0012) Implementation Class UID            UI: 1.2.826.0.1.3680043.8.498.1
(0002,0013) Implementation Version Name         SH: 'PYDICOM 1.2.0'
-------------------------------------------------
(0008,0016) SOP Class UID                       UI: Secondary Capture Image Storage
(0008,0018) SOP Instance UID                    UI: 1.3.6.1.4.1.11129.5.5.139539879914217162512411239901306132962191
(0008,0060) Modality                            CS: 'DX'
(0008,1030) Study Description                   LO: 'Atelectasis'
(0010,0020) Patient ID       

In [4]:
## Do some exploratory work before about how to extract these attributes using pydicom... 
# dcm1.PatientID
# dcm1.PatientAge
# dcm1.PatientSex
# dcm1.Modality
# dcm1.StudyDescription
# dcm1.Rows
# dcm1.Columns

# dcm1['PatientID'].value






## Now, let's create the dataframe that we want, and populate it in a loop with all of our DICOMS:

To complete this exercise, create a single dataframe that has the following columns:
- Patient ID
- Patient Age (as an integer)
- Patient Sex (M/F)
- Imaging Modality
- Type of finding in the image
- Number of rows in the image
- Number of columns in the image

Save this dataframe as a .CSV file.

In [5]:
Columns= ['PatientID', 'PatientAge', 'PatientSex', 'Modality', 'StudyDescription', 'Rows', 'Columns']
df = pd.DataFrame(columns=Columns) # empty dataframe

# Add each file's data to the dataframe
for dicom_path in mydicoms:
    dcm = pydicom.dcmread(dicom_path)

    # # VERSION 1 #
    # data = {
    #     'PatientID': dcm.PatientID,
    #     'PatientAge': dcm.PatientAge,
    #     'PatientSex': dcm.PatientSex,
    #     'Modality': dcm.Modality,
    #     'StudyDescription': dcm.StudyDescription,
    #     'Rows': dcm.Rows,
    #     'Columns': dcm.Columns
    # }
    # df.loc[len(df)] = data

    # VERSION 2 #
    data = [{
        'PatientID': dcm.PatientID,
        'PatientAge': dcm.PatientAge,
        'PatientSex': dcm.PatientSex,
        'Modality': dcm.Modality,
        'StudyDescription': dcm.StudyDescription,
        'Rows': dcm.Rows,
        'Columns': dcm.Columns
    }]
    df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)

# Save the dataframe to a CSV file
df.to_csv('dicom_metadata.csv', index=False)


In [6]:
df

Unnamed: 0,PatientID,PatientAge,PatientSex,Modality,StudyDescription,Rows,Columns
0,13118,69,M,DX,Atelectasis,1024,1024
1,1688,59,F,DX,Infiltration|Nodule,1024,1024
2,5066,52,M,DX,Cardiomegaly|Effusion|Infiltration,1024,1024
3,13659,62,F,DX,Consolidation|Mass|Pneumonia|Pneumothorax,1024,1024
4,10172,59,F,DX,Atelectasis|Effusion,1024,1024
5,29579,24,F,DX,Effusion|Nodule|Pleural_Thickening|Mass,1024,1024
6,23075,31,M,DX,Mass,1024,1024


In [None]:
# A different way to do it:
all_data = []

for i in mydicoms: 
    dcm = pydicom.dcmread(i)
    fields = [dcm.PatientID, int(dcm.PatientAge), dcm.PatientSex, dcm.Modality, dcm.StudyDescription,
             dcm.Rows, dcm.Columns] # List of list. Each list is a row.
    all_data.append(fields)

mydata = pd.DataFrame(all_data, 
                      columns = ['PatientID','PatientAge','PatientSex','Modality','Findings','Rows','Columns'])

mydata

Unnamed: 0,PatientID,PatientAge,PatientSex,Modality,Findings,Rows,Columns
0,13118,69,M,DX,Atelectasis,1024,1024
1,1688,59,F,DX,Infiltration|Nodule,1024,1024
2,5066,52,M,DX,Cardiomegaly|Effusion|Infiltration,1024,1024
3,13659,62,F,DX,Consolidation|Mass|Pneumonia|Pneumothorax,1024,1024
4,10172,59,F,DX,Atelectasis|Effusion,1024,1024
5,29579,24,F,DX,Effusion|Nodule|Pleural_Thickening|Mass,1024,1024
6,23075,31,M,DX,Mass,1024,1024
