# Store metadata of LIDC-IDRI dataset

## Imports and configuration

In [1]:
import os
import random
from statistics import median_high
from pathlib import Path

import pylidc as pl
import pandas as pd
import numpy as np
from tqdm import tqdm
import pydicom as dicom

In [45]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
LIDC_DIR = Path("/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/")
DICOM_DIR = LIDC_DIR / "LIDC-IDRI"
list(LIDC_DIR.iterdir())

[PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/LIDC-IDRI'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/._LIDC-IDRI'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/LIDC-XML-only.zip'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/._LIDC-XML-only.zip'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/LIDC-IDRI_MetaData.csv'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/._LIDC-IDRI_MetaData.csv'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/tcia-diagnosis-data-2012-04-20.xls'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/._tcia-diagnosis-data-2012-04-20.xls'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/list3.2.csv'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/._list3.2.csv'),
 PosixPath('/Volumes/LaCie/data/lung-cancer-detection/lidc-idri/processed')]

**Notes:**

- `LIDC-IDRI_MetaData.csv` does only contain very basic information about series => not particularly useful!
- `list3.2.csv` contains detailed information about each nodule => not particularly useful, as `pylidc` already filters this information for us!
- `tcia-diagnosis-data-2012-04-20.xls` contains diagnostic data associated with the case for a select number of 157 cases => might be useful for selecting cases

## Load DICOM metadata

**Note:** This is the cleaned metadata from the previous notebook!

In [3]:
img_meta = pd.read_csv(LIDC_DIR/"LIDC-IDRI/lidc_dicom_meta_clean.csv")
print(img_meta.shape)
print(img_meta.columns)
img_meta.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


(243992, 37)
Index(['BitsAllocated', 'PxMean', 'PxMin', 'PxMax', 'BitsStored',
       'BodyPartExamined', 'Columns', 'FrameOfReferenceUID',
       'ImageOrientationPatient', 'ImagePositionPatient', 'ImageType',
       'Manufacturer', 'ManufacturerModelName', 'Modality', 'PatientID',
       'PatientPosition', 'PatientSex', 'PixelPaddingValue',
       'PixelRepresentation', 'PixelSpacing', 'RescaleIntercept',
       'RescaleSlope', 'RotationDirection', 'Rows', 'SOPClassUID',
       'SOPInstanceUID', 'SamplesPerPixel', 'SeriesInstanceUID',
       'SeriesNumber', 'SliceLocation', 'SliceThickness', 'StudyID',
       'StudyInstanceUID', 'WindowCenter', 'WindowWidth', 'zPos', 'path'],
      dtype='object')


Unnamed: 0,BitsAllocated,PxMean,PxMin,PxMax,BitsStored,BodyPartExamined,Columns,FrameOfReferenceUID,ImageOrientationPatient,ImagePositionPatient,...,SeriesInstanceUID,SeriesNumber,SliceLocation,SliceThickness,StudyID,StudyInstanceUID,WindowCenter,WindowWidth,zPos,path
0,16,-91.907864,-2000,2607,16,CHEST,512,1.3.6.1.4.1.14519.5.2.1.6279.6001.317612173882...,"[1.000000, 0.000000, 0.000000, 0.000000, 1.000...","[-175.500000, -174.500000, -9.500000]",...,1.3.6.1.4.1.14519.5.2.1.6279.6001.619372068417...,3000522.0,-9.5,1.25,,1.3.6.1.4.1.14519.5.2.1.6279.6001.490157381160...,40,350,-9.5,/Volumes/LaCie/data/lung-cancer-detection/lidc...
1,16,-86.287533,-2000,2580,16,CHEST,512,1.3.6.1.4.1.14519.5.2.1.6279.6001.317612173882...,"[1.000000, 0.000000, 0.000000, 0.000000, 1.000...","[-175.500000, -174.500000, -10.750000]",...,1.3.6.1.4.1.14519.5.2.1.6279.6001.619372068417...,3000522.0,-10.75,1.25,,1.3.6.1.4.1.14519.5.2.1.6279.6001.490157381160...,40,350,-10.75,/Volumes/LaCie/data/lung-cancer-detection/lidc...
2,16,-80.395668,-2000,2622,16,CHEST,512,1.3.6.1.4.1.14519.5.2.1.6279.6001.317612173882...,"[1.000000, 0.000000, 0.000000, 0.000000, 1.000...","[-175.500000, -174.500000, -12.000000]",...,1.3.6.1.4.1.14519.5.2.1.6279.6001.619372068417...,3000522.0,-12.0,1.25,,1.3.6.1.4.1.14519.5.2.1.6279.6001.490157381160...,40,350,-12.0,/Volumes/LaCie/data/lung-cancer-detection/lidc...
3,16,-74.869011,-2000,2527,16,CHEST,512,1.3.6.1.4.1.14519.5.2.1.6279.6001.317612173882...,"[1.000000, 0.000000, 0.000000, 0.000000, 1.000...","[-175.500000, -174.500000, -13.250000]",...,1.3.6.1.4.1.14519.5.2.1.6279.6001.619372068417...,3000522.0,-13.25,1.25,,1.3.6.1.4.1.14519.5.2.1.6279.6001.490157381160...,40,350,-13.25,/Volumes/LaCie/data/lung-cancer-detection/lidc...
4,16,-69.924225,-2000,2589,16,CHEST,512,1.3.6.1.4.1.14519.5.2.1.6279.6001.317612173882...,"[1.000000, 0.000000, 0.000000, 0.000000, 1.000...","[-175.500000, -174.500000, -14.500000]",...,1.3.6.1.4.1.14519.5.2.1.6279.6001.619372068417...,3000522.0,-14.5,1.25,,1.3.6.1.4.1.14519.5.2.1.6279.6001.490157381160...,40,350,-14.5,/Volumes/LaCie/data/lung-cancer-detection/lidc...


## Load patient metadata

### Load raw diagnosis data

In [4]:
raw = pd.read_excel(LIDC_DIR/"tcia-diagnosis-data-2012-04-20.xls")
print(raw.shape)
raw.head()

(157, 14)


Unnamed: 0,TCIA Patient ID,"Diagnosis at the Patient Level\n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic\n",Diagnosis Method\n0 = unknown\n1 = review of radiological images to show 2 years of stable nodule\n2 = biopsy\n3 = surgical resection\n4 = progression or response,Primary tumor site for metastatic disease,"Nodule 1\nDiagnosis at the Nodule Level \n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic)\n",Nodule 1\nDiagnosis Method at the Nodule Level\n0 = unknown\n1 = review of radiological images to show 2 years of stable nodule\n2 = biopsy\n3 = surgical resection\n4 = progression or response\n,"Nodule 2\nDiagnosis at the Nodule Level \n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic)\n",Nodule 2\nDiagnosis Method at the Nodule Level\n0 = unknown\n1 = review of radiological images to show 2 years of stable nodule\n2 = biopsy\n3 = surgical resection\n4 = progression or response\n,"Nodule 3\nDiagnosis at the Nodule Level \n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic)\n",Nodule 3\nDiagnosis Method at the Nodule Level\n0 = unknown\n1 = review of radiological images to show 2 years of stable nodule\n2 = biopsy\n3 = surgical resection\n4 = progression or response\n,"Nodule 4\nDiagnosis at the Nodule Level \n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic)\n",Nodule 4\nDiagnosis Method at the Nodule Level\n0 = unknown\n1 = review of radiological images to show 2 years of stable nodule\n2 = biopsy\n3 = surgical resection\n4 = progression or response\n,"Nodule 5\nDiagnosis at the Nodule Level \n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic)\n",Nodule 5\nDiagnosis Method at the Nodule Level\n0 = unknown\n1 = review of radiological images to show 2 years of stable nodule\n2 = biopsy\n3 = surgical resection\n4 = progression or response\n
0,LIDC-IDRI-0068,3,4,Head & Neck Cancer,3.0,4.0,,,,,,,,
1,LIDC-IDRI-0071,3,1,Head & Neck,1.0,1.0,,,,,,,,
2,LIDC-IDRI-0072,2,4,Lung Cancer,1.0,4.0,,,,,,,,
3,LIDC-IDRI-0088,3,0,Uterine Cancer,0.0,0.0,,,,,,,,
4,LIDC-IDRI-0090,2,3,NSCLC,2.0,3.0,,,,,,,,


In [5]:
raw.columns

Index(['TCIA Patient ID',
       'Diagnosis at the Patient Level\n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic\n',
       'Diagnosis Method\n0 = unknown\n1 = review of radiological images to show 2 years of stable nodule\n2 = biopsy\n3 = surgical resection\n4 = progression or response',
       'Primary tumor site for metastatic disease',
       'Nodule 1\nDiagnosis at the Nodule Level \n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic)\n',
       'Nodule 1\nDiagnosis Method at the Nodule Level\n0 = unknown\n1 = review of radiological images to show 2 years of stable nodule\n2 = biopsy\n3 = surgical resection\n4 = progression or response\n',
       'Nodule 2\nDiagnosis at the Nodule Level \n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic)\n',
       'Nodule 2\nDiagnosis Method at the Nodule Level\n0 = unknown\n1 =

### Store most relevant data in new dataframe

In [6]:
pat_meta = pd.DataFrame(columns=["PatientID", "Diagnosis", "DiagnosisMethod", "PrimaryTumorSiteMetastaticDisease"])
pat_meta.shape

(0, 4)

In [7]:
pat_meta["PatientID"] = raw["TCIA Patient ID"].copy()
pat_meta["Diagnosis"] = raw["Diagnosis at the Patient Level\n0=Unknown\n1=benign or non-malignant disease\n2= malignant, primary lung cancer\n3 = malignant metastatic\n"].copy()
pat_meta["DiagnosisMethod"] = raw["Diagnosis Method\n0 = unknown\n1 = review of radiological images to show 2 years of stable nodule\n2 = biopsy\n3 = surgical resection\n4 = progression or response"].copy()
pat_meta["PrimaryTumorSiteMetastaticDisease"] = raw["Primary tumor site for metastatic disease"].copy()

In [8]:
pat_meta.head()

Unnamed: 0,PatientID,Diagnosis,DiagnosisMethod,PrimaryTumorSiteMetastaticDisease
0,LIDC-IDRI-0068,3,4,Head & Neck Cancer
1,LIDC-IDRI-0071,3,1,Head & Neck
2,LIDC-IDRI-0072,2,4,Lung Cancer
3,LIDC-IDRI-0088,3,0,Uterine Cancer
4,LIDC-IDRI-0090,2,3,NSCLC


In [9]:
print(raw.columns[1],"\n", raw.columns[2])

Diagnosis at the Patient Level
0=Unknown
1=benign or non-malignant disease
2= malignant, primary lung cancer
3 = malignant metastatic
 
 Diagnosis Method
0 = unknown
1 = review of radiological images to show 2 years of stable nodule
2 = biopsy
3 = surgical resection
4 = progression or response


In [10]:
pat_meta.to_csv(LIDC_DIR/"LIDC-IDRI/lidc_patient_meta.csv", index=False)

## Load scan metadata

### Get list of patient IDs

In [11]:
patients = [f for f in os.listdir(DICOM_DIR) if not f.startswith('.') and not f.endswith('.csv')]
patients.sort()
print(len(patients))

1010


### Get all scans for each patient

In [12]:
scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == patients[0]).all()[0]
scan

Scan(id=12,patient_id=LIDC-IDRI-0001)

In [13]:
scans = []
for pid in tqdm(patients):
    pscans = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).all()
    scans += pscans
len(scans)

100%|██████████| 1010/1010 [00:01<00:00, 515.31it/s]


1018

### Extract metadata from scans

In [14]:
def get_scan_meta(scan):
    meta = {
        "StudyID": scan.study_instance_uid,
        "SeriesID": scan.series_instance_uid,
        "PatientID": scan.patient_id,
        "SliceThickness": scan.slice_thickness,
        "SliceSpacing": scan.slice_spacing,
        "PixelSpacing": scan.pixel_spacing,
        "ContrastUsed": scan.contrast_used,
    }
    return meta

In [15]:
get_scan_meta(scan)

{'StudyID': '1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178',
 'SeriesID': '1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192',
 'PatientID': 'LIDC-IDRI-0001',
 'SliceThickness': 2.5,
 'SliceSpacing': 2.5,
 'PixelSpacing': 0.703125,
 'ContrastUsed': True}

### Extract additional image metadata

In [16]:
# Question: can we assume that all images of a series have the same metadata?
for s in tqdm(scans):
    path = Path(s.get_path_to_dicom_files())
    fnames = sorted([fname for fname in os.listdir(path) if fname.endswith('.dcm')])
    dcm1 = dicom.dcmread(path/fnames[0])
    dcm2 = dicom.dcmread(path/random.choice(fnames))
    if not dcm1.PixelSpacing == dcm2.PixelSpacing:
        print("Pixel Spacing", s.patient_id, dcm1.PixelSpacing, dcm2.PixelSpacing)
    if not dcm1.ImagePositionPatient[:2] == dcm2.ImagePositionPatient[:2]:
        print("Image Position", s.patient_id, dcm1.ImagePositionPatient, dcm2.ImagePositionPatient)
    if not dcm1.ImageOrientationPatient == dcm2.ImageOrientationPatient:
        print("Image Orientation", s.patient_id, dcm1.ImageOrientationPatient, dcm2.ImageOrientationPatient)

100%|██████████| 1018/1018 [04:41<00:00,  3.62it/s]


Thus, it's safe to assume that the first image of each series is representative of all images.

In [17]:
def get_img_meta(scan):
    path = Path(scan.get_path_to_dicom_files())
    fnames = sorted([fname for fname in os.listdir(path) if fname.endswith('.dcm')])
    dcm = dicom.dcmread(path/fnames[0])
    meta = {
        "ImagePositionPatient": getattr(dcm, "ImagePositionPatient", np.nan),
        "ImageOrientationPatient": getattr(dcm, "ImageOrientationPatient", np.nan),
        "Rows": getattr(dcm, "Rows", np.nan),
        "Columns": getattr(dcm, "Columns", np.nan),
        "RescaleIntercept": getattr(dcm, "RescaleIntercept", np.nan),
        "RescaleSlope": getattr(dcm, "RescaleSlope", np.nan),
        "WindowCenter": getattr(dcm, "WindowCenter", np.nan),
        "WindowWidth": getattr(dcm, "WindowWidth", np.nan),
        "BitsAllocated": getattr(dcm, "BitsAllocated", np.nan),
        "PixelRepresentation": getattr(dcm, "PixelRepresentation", np.nan),
        "Manufacturer": getattr(dcm, "Manufacturer", ""),
        "ManufacturerModelName": getattr(dcm, "ManufacturerModelName", ""),
    }
    return meta

In [18]:
get_img_meta(scan)

{'ImagePositionPatient': [-166.000000, -171.699997, -10.000000],
 'ImageOrientationPatient': [1.000000, 0.000000, 0.000000, 0.000000, 1.000000, 0.000000],
 'Rows': 512,
 'Columns': 512,
 'RescaleIntercept': "-1024.0",
 'RescaleSlope': "1.0",
 'WindowCenter': "-600.0",
 'WindowWidth': "1600.0",
 'BitsAllocated': 16,
 'PixelRepresentation': 1,
 'Manufacturer': 'GE MEDICAL SYSTEMS',
 'ManufacturerModelName': 'LightSpeed Plus'}

### Extract basic nodule metadata

In [19]:
def get_nodule_meta(scan):
    ann_clusters = scan.cluster_annotations(verbose=False)
    maligs = [median_high([ann.malignancy for ann in cluster]) for cluster in ann_clusters]
    meta = {
        "NumAnnotations": len(scan.annotations),
        "NumNodules": len(ann_clusters),
        "MaxMalignancy": np.max(maligs) if len(maligs) > 0 else np.nan,
    }
    return meta

In [20]:
get_nodule_meta(scan)

{'NumAnnotations': 4, 'NumNodules': 1, 'MaxMalignancy': 5}

### Putting it all together

In [21]:
data = []
for scan in tqdm(scans):
    meta = {}
    scan_meta = get_scan_meta(scan)
    meta.update(scan_meta)
    img_meta = get_img_meta(scan)
    meta.update(img_meta)
    nodule_meta = get_nodule_meta(scan)
    meta.update(nodule_meta)
    data.append(meta)
len(data)

100%|██████████| 1018/1018 [10:18<00:00,  1.65it/s]


1018

In [22]:
df = pd.DataFrame(data=data)
df.shape

(1018, 22)

In [23]:
df.head()

Unnamed: 0,StudyID,SeriesID,PatientID,SliceThickness,SliceSpacing,PixelSpacing,ContrastUsed,ImagePositionPatient,ImageOrientationPatient,Rows,...,RescaleSlope,WindowCenter,WindowWidth,BitsAllocated,PixelRepresentation,Manufacturer,ManufacturerModelName,NumAnnotations,NumNodules,MaxMalignancy
0,1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288...,1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636...,LIDC-IDRI-0001,2.5,2.5,0.703125,True,"[-166.000000, -171.699997, -10.000000]","[1.000000, 0.000000, 0.000000, 0.000000, 1.000...",512,...,1.0,-600.0,1600.0,16,1,GE MEDICAL SYSTEMS,LightSpeed Plus,4,1,5.0
1,1.3.6.1.4.1.14519.5.2.1.6279.6001.490157381160...,1.3.6.1.4.1.14519.5.2.1.6279.6001.619372068417...,LIDC-IDRI-0002,1.25,1.25,0.681641,False,"[-175.500000, -174.500000, -9.500000]","[1.000000, 0.000000, 0.000000, 0.000000, 1.000...",512,...,1.0,40.0,350.0,16,1,GE MEDICAL SYSTEMS,LightSpeed16,2,1,5.0
2,1.3.6.1.4.1.14519.5.2.1.6279.6001.101370605276...,1.3.6.1.4.1.14519.5.2.1.6279.6001.170706757615...,LIDC-IDRI-0003,2.5,2.5,0.820312,True,"[-228.800003, -210.000000, -31.500000]","[1.000000, 0.000000, 0.000000, 0.000000, 1.000...",512,...,1.0,-600.0,1600.0,16,1,GE MEDICAL SYSTEMS,LightSpeed16,13,4,5.0
3,1.3.6.1.4.1.14519.5.2.1.6279.6001.191425307197...,1.3.6.1.4.1.14519.5.2.1.6279.6001.323541312620...,LIDC-IDRI-0004,1.25,1.25,0.822266,True,"[-234.800003, -170.500000, -25.000000]","[1.000000, 0.000000, 0.000000, 0.000000, 1.000...",512,...,1.0,40.0,400.0,16,1,GE MEDICAL SYSTEMS,LightSpeed16,4,1,1.0
4,1.3.6.1.4.1.14519.5.2.1.6279.6001.190188259083...,1.3.6.1.4.1.14519.5.2.1.6279.6001.129007566048...,LIDC-IDRI-0005,2.5,2.5,0.664062,True,"[-163.300003, -170.000000, -10.045000]","[1.000000, 0.000000, 0.000000, 0.000000, 1.000...",512,...,1.0,-600.0,1600.0,16,1,GE MEDICAL SYSTEMS,LightSpeed Plus,9,3,3.0


In [24]:
df.to_csv(LIDC_DIR/"LIDC-IDRI/lidc_scan_meta.csv", index=False)

## Load nodule metadata

In [42]:
def detailed_nodule_meta(scan):
    ann_clusters = scan.cluster_annotations(verbose=False)
    meta = []
    for i, cluster in enumerate(ann_clusters):
        nod_meta = {
            'PatientID': scan.patient_id,
            'StudyID': scan.study_instance_uid,
            'SeriesID': scan.series_instance_uid,
            'NoduleID': i,
            'NumAnnotations': len(cluster),
            'Diameter': np.mean([ann.diameter for ann in cluster]),
            'SurfaceArea': np.mean([ann.surface_area for ann in cluster]),
            'Volume': np.mean([ann.volume for ann in cluster]),
            'Malignancy': median_high([ann.malignancy for ann in cluster]),
            'Texture': median_high([ann.texture for ann in cluster]),
            'Spiculation': median_high([ann.spiculation for ann in cluster]),
            'Lobulation': median_high([ann.lobulation for ann in cluster]),
            'Margin': median_high([ann.margin for ann in cluster]),
            'Sphericity': median_high([ann.sphericity for ann in cluster]),
            'Calcification': median_high([ann.calcification for ann in cluster]),
            'InternalStructure': median_high([ann.internalStructure for ann in cluster]),
            'Subtlety': median_high([ann.subtlety for ann in cluster]),
        }
        meta.append(nod_meta)
    return meta

In [43]:
detailed_nodule_meta(scan)

[{'PatientID': 'LIDC-IDRI-1012',
  'StudyID': '1.3.6.1.4.1.14519.5.2.1.6279.6001.676549258486738448212921834668',
  'SeriesID': '1.3.6.1.4.1.14519.5.2.1.6279.6001.153646219551578201092527860224',
  'NoduleID': 0,
  'NumAnnotations': 4,
  'Diameter': 9.267563364103658,
  'SurfaceArea': 187.58368677254273,
  'Volume': 171.16143781863138,
  'Malignancy': 2,
  'Texture': 5,
  'Spiculation': 1,
  'Lobulation': 1,
  'Margin': 4,
  'Sphericity': 4,
  'Calcification': 6,
  'InternalStructure': 1,
  'Subtlety': 4}]

In [52]:
data = []
for scan in tqdm(scans):
    meta = detailed_nodule_meta(scan)
    data += meta

100%|██████████| 1018/1018 [05:02<00:00,  3.36it/s]


In [53]:
df = pd.DataFrame(data=data)
df.shape

(2651, 17)

In [54]:
df.head()

Unnamed: 0,PatientID,StudyID,SeriesID,NoduleID,NumAnnotations,Diameter,SurfaceArea,Volume,Malignancy,Texture,Spiculation,Lobulation,Margin,Sphericity,Calcification,InternalStructure,Subtlety
0,LIDC-IDRI-0001,1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288...,1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636...,0,4,32.755812,2491.466573,6989.673615,5,5,5,3,4,4,6,1,5
1,LIDC-IDRI-0002,1.3.6.1.4.1.14519.5.2.1.6279.6001.490157381160...,1.3.6.1.4.1.14519.5.2.1.6279.6001.619372068417...,0,2,30.781671,2807.198994,7244.667508,5,2,1,1,2,5,6,1,2
2,LIDC-IDRI-0003,1.3.6.1.4.1.14519.5.2.1.6279.6001.101370605276...,1.3.6.1.4.1.14519.5.2.1.6279.6001.170706757615...,0,1,31.664468,1996.252117,4731.410934,2,1,1,1,2,5,6,1,1
3,LIDC-IDRI-0003,1.3.6.1.4.1.14519.5.2.1.6279.6001.101370605276...,1.3.6.1.4.1.14519.5.2.1.6279.6001.170706757615...,1,4,31.001964,2225.67735,6519.463698,5,4,3,2,3,4,6,1,5
4,LIDC-IDRI-0003,1.3.6.1.4.1.14519.5.2.1.6279.6001.101370605276...,1.3.6.1.4.1.14519.5.2.1.6279.6001.170706757615...,2,4,13.309155,321.183599,472.089669,4,5,2,2,4,4,6,1,4


In [55]:
df.to_csv(DICOM_DIR/"lidc_nodule_meta.csv", index=False)