<a href="https://colab.research.google.com/github/deepakri201/SR_for_Lung_PET_CT_Dx/blob/main/demo/Lung_PET_CT_Dx_FM_demo_part1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lung-PET-CT-Dx_FM_demo_part1

In this notebook, we use metadata extracted from the DICOM SR files (present in BQ tables), and save out info to a csv file.

Deepa Krishnaswamy

Brigham and Women's Hospital

September 2025

Notes:
- Colab Pro
- Tables that hold the DICOM SR metadata: idc-external-018.sr_lung_pet_ct_dx.bbox_measurements
- Insert requirements for running with own project
- Need two datasets - lung_pet_ct_dx_fm and lung_pet_ct_dx_fm_demo

In [1]:
# SET THESE OPTIONS

# Create the BQ tables to hold the measurements from the SRs
# If not, load from csv file in github
create_bq_tables = 0

### TO DO ###
# Create this BQ dataset if create_bq_tables = 1
dataset_name = "lung_pet_ct_dx_fm_demo"


# Parameterization

In [2]:
#@title Enter your Project ID here
# initialize this variable with your Google Cloud Project ID!
project_name = "idc-external-018" #@param {type:"string"}

import os
os.environ["GCP_PROJECT_ID"] = project_name

!gcloud config set project $project_name

from google.colab import auth
auth.authenticate_user()

Updated property [core/project].


# Environment Setup

In [3]:
!pip install idc-index

Collecting idc-index
  Downloading idc_index-0.9.2-py3-none-any.whl.metadata (7.9 kB)
Collecting duckdb<=1.2.1,>=0.10.0 (from idc-index)
  Downloading duckdb-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (966 bytes)
Collecting idc-index-data==21.0.0 (from idc-index)
  Downloading idc_index_data-21.0.0-py3-none-any.whl.metadata (5.5 kB)
Collecting s5cmd (from idc-index)
  Downloading s5cmd-0.2.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.7 kB)
Collecting sphinx-click (from idc-index)
  Downloading sphinx_click-6.0.0-py3-none-any.whl.metadata (3.4 kB)
Downloading idc_index-0.9.2-py3-none-any.whl (27 kB)
Downloading idc_index_data-21.0.0-py3-none-any.whl (80.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.6/80.6 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading duckdb-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.2 MB)
[2K   [90m━━━━━━━━

In [4]:
import os
import sys
import time

import numpy as np
import pandas as pd
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt

import json
from pathlib import Path

In [5]:
from google.cloud import bigquery
from google.cloud import storage

In [6]:
from idc_index import IDCClient

idc_client = IDCClient.client()

In [7]:
# if create_bq_tables - download the BQ queries
# else get the bbox_measurements csv file from github

if create_bq_tables:
  !wget -O /content/measurement_groups.sql https://raw.githubusercontent.com/deepakri201/SR_for_Lung_PET_CT_Dx/main/sql/measurement_groups.sql
  !wget -O /content/bbox_measurements.sql https://raw.githubusercontent.com/deepakri201/SR_for_Lung_PET_CT_Dx/main/sql/bbox_measurements.sql
else:
  !wget https://github.com/deepakri201/SR_for_Lung_PET_CT_Dx/releases/download/v1.0.1/bbox_measurements.csv


--2025-09-05 16:19:05--  https://github.com/deepakri201/SR_for_Lung_PET_CT_Dx/releases/download/v1.0.0/bbox_measurements.csv
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/1050768858/d8647cdd-885d-4d7f-8f90-6aa7b38d530a?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-09-05T17%3A12%3A24Z&rscd=attachment%3B+filename%3Dbbox_measurements.csv&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-09-05T16%3A11%3A45Z&ske=2025-09-05T17%3A12%3A24Z&sks=b&skv=2018-11-09&sig=lmg%2Fd8Ecr5E1%2BX0mdJh8uZI2fVfBEeBKQNvWnzwsjKA%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1NzA4OTQ0NSwibmJmIjoxNzU3MDg5MTQ1LCJwYXRoIjoicmV

# Get the bounding box information

## df_sr - get the metadata from the SRs

In [61]:
# If create the BQ tables, run two queries and save the results
# Requires access to the original DICOM SR files and DICOM store
# Else load the measurements from a github release attachment
# MAKE SURE you have a dataset called "sr_nlst_sybil" already created!

if create_bq_tables:

  # Get queries
  # Measurement groups
  query_measurement_groups_filename = "/content/measurement_groups.sql"
  with open(query_measurement_groups_filename, 'r') as file:
    query_measurement_groups = file.read()
  # Bbox measurements
  query_bbox_measurements_filename = "/content/bbox_measurements.sql"
  with open(query_bbox_measurements_filename, 'r') as file:
    query_bbox_measurements = file.read()

  # Create the table below using the query_measurement_groups
  # `idc-external-018.sr_nlst_sybil.measurement_groups`
  client_bq = bigquery.Client(project=project_name)
  destination_table_id_measurement_groups = ".".join([project_name,
                                                      dataset_name,
                                                      "measurement_groups"])
  job_config = bigquery.QueryJobConfig(destination=destination_table_id_measurement_groups)
  job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
  query_job = client_bq.query(query_measurement_groups, job_config=job_config)
  query_job.result()
  print(f"Query results saved to table: {destination_table_id_measurement_groups}")

  # Create the table below using the query_bbox_measurements
  # `idc-external-018.sr_nlst_sybil.bbox_measurements`
  destination_table_id_bbox_measurements = ".".join([project_name,
                                                     dataset_name,
                                                     "bbox_measurements"])
  job_config = bigquery.QueryJobConfig(destination=destination_table_id_bbox_measurements)
  job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
  query_job = client_bq.query(query_bbox_measurements, job_config=job_config)
  query_job.result()
  print(f"Query results saved to table: {destination_table_id_bbox_measurements}")

  # Then query that table
  query = f"""
      SELECT
        *
      FROM
        `{project_name}.{dataset_name}.bbox_measurements`
        """
  df_sr = client_bq.query(query).to_dataframe()

else:

  df_sr = pd.read_csv("/content/bbox_measurements.csv")


In [62]:
# Rename the column so we know it's the SeriesInstanceUID of the SR

# df_sr = df_sr.rename(columns={'SeriesInstanceUID':"SR_SeriesInstanceUID"})

In [63]:
# First add columns for the width, height, center_x, and center_y

width_list = []
height_list = []
center_x_list = []
center_y_list = []

for index, row in df_sr.iterrows():
  # Get values
  x0 = row['x0']; y0 = row['y0']
  x1 = row['x1']; y1 = row['y1']
  x2 = row['x2']; y2 = row['y2']
  x3 = row['x3']; y3 = row['y3']
  # calculate the width, height and center, as these are needed for display
  min_x = np.min([x0, x1, x2, x3]) # using roi.GraphicData: min_x = np.min([bbox[0], bbox[2], bbox[4], bbox[6]])
  max_x = np.max([x0, x2, x2, x3]) # using roi.GraphicData: max_x = np.max([bbox[0], bbox[2], bbox[4], bbox[6]])
  min_y = np.min([y0, y1, y2, y3]) # using roi.GraphicData: min_y = np.min([bbox[1], bbox[3], bbox[5], bbox[7]])
  max_y = np.max([y0, y1, y2, y3]) # using roi.GraphicData: max_y = np.max([bbox[1], bbox[3], bbox[5], bbox[7]])
  width = max_x - min_x
  height = max_y - min_y
  center_x = min_x + width/2
  center_y = min_y + height/2
  # append
  width_list.append(width)
  height_list.append(height)
  center_x_list.append(center_x)
  center_y_list.append(center_y)

# Add columns
df_sr['width'] = width_list
df_sr['height'] = height_list
df_sr['center_x'] = center_x_list
df_sr['center_y'] = center_y_list

df_sr.head()

Unnamed: 0.1,Unnamed: 0,PatientID,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,ReferencedSeriesInstanceUID,trackingIdentifier,trackingUniqueIdentifier,finding,findingSite,...,x1,y1,x2,y2,x3,y3,width,height,center_x,center_y
0,0,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.55301614185745357336...,1.2.826.0.1.3680043.8.498.24267054384212912983...,1.3.6.1.4.1.14519.5.2.1.6655.2359.257508444832...,1,1.2.826.0.1.3680043.8.498.84609255769050482843...,"{\n ""finding"": {\n ""CodeValue"": ""118733200...","{\n ""findingSite"": {\n ""CodeValue"": ""39607...",...,335.0,298.0,335.0,374.0,298.0,374.0,37.0,76.0,316.5,336.0
1,1,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.55301614185745357336...,1.2.826.0.1.3680043.8.498.24267054384212912983...,1.3.6.1.4.1.14519.5.2.1.6655.2359.257508444832...,2,1.2.826.0.1.3680043.8.498.98307061047746048061...,"{\n ""finding"": {\n ""CodeValue"": ""118733200...","{\n ""findingSite"": {\n ""CodeValue"": ""39607...",...,338.0,305.0,338.0,378.0,290.0,378.0,48.0,73.0,314.0,341.5
2,2,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.55301614185745357336...,1.2.826.0.1.3680043.8.498.24267054384212912983...,1.3.6.1.4.1.14519.5.2.1.6655.2359.257508444832...,3,1.2.826.0.1.3680043.8.498.81385278889407636940...,"{\n ""finding"": {\n ""CodeValue"": ""118733200...","{\n ""findingSite"": {\n ""CodeValue"": ""39607...",...,342.0,300.0,342.0,379.0,287.0,379.0,55.0,79.0,314.5,339.5
3,3,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.55301614185745357336...,1.2.826.0.1.3680043.8.498.24267054384212912983...,1.3.6.1.4.1.14519.5.2.1.6655.2359.257508444832...,4,1.2.826.0.1.3680043.8.498.46178354798970203848...,"{\n ""finding"": {\n ""CodeValue"": ""118733200...","{\n ""findingSite"": {\n ""CodeValue"": ""39607...",...,355.0,307.0,355.0,388.0,282.0,388.0,73.0,81.0,318.5,347.5
4,4,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.55301614185745357336...,1.2.826.0.1.3680043.8.498.24267054384212912983...,1.3.6.1.4.1.14519.5.2.1.6655.2359.257508444832...,5,1.2.826.0.1.3680043.8.498.20209883418525886800...,"{\n ""finding"": {\n ""CodeValue"": ""118733200...","{\n ""findingSite"": {\n ""CodeValue"": ""39607...",...,360.0,308.0,360.0,394.0,278.0,394.0,82.0,86.0,319.0,351.0


## df_idc - get the metadata from IDC

In [64]:
# We need the following fields in order to convert from pixel coordintes to mm for the bounding box
# And we need the Dimensions, Pixel spacing IPP, especially IPP[2] for the z value

referenced_sop_instance_uid_list = list(df_sr['ReferencedSOPInstanceUID'].values)

client_bq = bigquery.Client(project=project_name)

query = f"""
    SELECT
      PatientID,
      StudyInstanceUID,
      SeriesInstanceUID,
      SOPInstanceUID,
      `Rows` as num_rows,
      `Columns` as num_columns,
      PixelSpacing,
      ImagePositionPatient
    FROM
      `bigquery-public-data.idc_current.dicom_all`
    WHERE
      SOPInstanceUID IN UNNEST(@referenced_sop_instance_uid_list)
    ORDER BY
      PatientID,
      StudyInstanceUID,
      SeriesInstanceUID,
      ImagePositionPatient[SAFE_OFFSET(2)]
      """

job_config = bigquery.QueryJobConfig(query_parameters=[bigquery.ArrayQueryParameter("referenced_sop_instance_uid_list", "STRING", referenced_sop_instance_uid_list)])
df_idc = client_bq.query(query, job_config=job_config).to_dataframe()

In [65]:
df_idc.head()

Unnamed: 0,PatientID,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,num_rows,num_columns,PixelSpacing,ImagePositionPatient
0,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,1.3.6.1.4.1.14519.5.2.1.6655.2359.741206086241...,512,512,"[0.841796875, 0.841796875]","[-228.036842, -93.0631599, -189]"
1,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,1.3.6.1.4.1.14519.5.2.1.6655.2359.289186849349...,512,512,"[0.841796875, 0.841796875]","[-228.036842, -93.0631599, -194]"
2,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,1.3.6.1.4.1.14519.5.2.1.6655.2359.191468104323...,512,512,"[0.841796875, 0.841796875]","[-228.036842, -93.0631599, -199]"
3,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,1.3.6.1.4.1.14519.5.2.1.6655.2359.253763111879...,512,512,"[0.841796875, 0.841796875]","[-228.036842, -93.0631599, -204]"
4,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,1.3.6.1.4.1.14519.5.2.1.6655.2359.102500633407...,512,512,"[0.841796875, 0.841796875]","[-228.036842, -93.0631599, -209]"


In [66]:
# Only keep the ones with valid PixelSpacing and ImagePatientPosition columns
# Reformat these columns

print("Number of rows in original df_idc: " + str(len(df_idc)))

df_idc = df_idc[df_idc['ImagePositionPatient'].map(len) > 0]
df_idc = df_idc[df_idc['PixelSpacing'].map(len) > 0]

df_idc['pixel_spacing_x'] = [np.float32(f[0]) for f in df_idc['PixelSpacing'].values]
df_idc['pixel_spacing_y'] = [np.float32(f[1]) for f in df_idc['PixelSpacing'].values]
df_idc['ipp0'] = [np.float32(f[0]) for f in df_idc['ImagePositionPatient'].values]
df_idc['ipp1'] = [np.float32(f[1]) for f in df_idc['ImagePositionPatient'].values]
df_idc['ipp2'] = [np.float32(f[2]) for f in df_idc['ImagePositionPatient'].values]

df_idc = df_idc[['PatientID', 'StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID',
                 'num_rows', 'num_columns',
                 'pixel_spacing_x', 'pixel_spacing_y',
                 'ipp0', 'ipp1', 'ipp2']]

print("Number of rows in new df_idc: " + str(len(df_idc)))

Number of rows in original df_idc: 30680
Number of rows in new df_idc: 20323


## Join the tables to hold the SR info and IDC metadata info

In [67]:
# Modify the findings column in the dataframe

# Parse the JSON strings into dicts
df_sr["finding_dict"] = df_sr["finding"].apply(json.loads)
# Normalize into flat columns
finding_df = pd.json_normalize(df_sr["finding_dict"])
# Join back to original df
df_sr = pd.concat([df_sr, finding_df], axis=1)

In [68]:
# Repeat for the findingSite column

# Parse the JSON strings into dicts
df_sr["findingSite_dict"] = df_sr["findingSite"].apply(json.loads)
# Normalize into flat columns
findingSite_df = pd.json_normalize(df_sr["findingSite_dict"])
# Join back to original df
df_sr = pd.concat([df_sr, findingSite_df], axis=1)

In [69]:
# Add a column for the classification - we will focus on classifying non-small cell carcinoma = Adenocarcinoma, Large Cell Carcinoma and Squamous Cell Carcinoma

mapping_dict = {"Adenocarcinoma": np.int32(0),
                "Large cell carcinoma": np.int32(1),
                "Squamous cell carcinoma": np.int32(2)}
df_sr['classification_labels'] = df_sr['finding.CodeMeaning'].map(mapping_dict)

In [70]:
# Then join with df_idc
df_sr_join = df_sr.merge(df_idc,
                         left_on=['ReferencedSOPInstanceUID'],
                         right_on=['SOPInstanceUID'],
                         suffixes=('','_right'))

# Drop the duplicate column from the right dataframe
df_sr_join = df_sr_join.drop(columns=['PatientID_right','SOPInstanceUID_right'])


In [71]:
# Rename columns

df_sr_and_metadata = df_sr_join.copy(deep=True)

df_sr_and_metadata = df_sr_and_metadata.rename({'trackingIdentifier': 'TrackingIdentifier',
                                                'trackingUniqueIdentifier':'TrackingUID',
                                                'finding.CodingSchemeDesignator':'FindingType.CodingSchemeDesignator',
                                                'finding.CodeValue':'FindingType.CodeValue',
                                                'finding.CodeMeaning':'FindingType.CodeMeaning',
                                                'findingSite.CodingSchemeDesignator': 'FindingSite.CodingSchemeDesignator',
                                                'findingSite.CodeValue': 'FindingSite.CodeValue',
                                                'findingSite.CodeMeaning': 'FindingSite.CodeMeaning'}, axis=1)
# Reorder the columns
df_sr_and_metadata = df_sr_and_metadata[['PatientID', 'StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID', 'ReferencedSeriesInstanceUID',
                                         'TrackingIdentifier', 'TrackingUID', 'ReferencedSOPInstanceUID',
                                         'FindingType.CodingSchemeDesignator', 'FindingType.CodeValue', 'FindingType.CodeMeaning',
                                         'FindingSite.CodingSchemeDesignator', 'FindingSite.CodeValue', 'FindingSite.CodeMeaning',
                                         'pixel_spacing_x', 'pixel_spacing_y',
                                         'width', 'height', 'center_x', 'center_y', 'ipp0', 'ipp1', 'ipp2',
                                         "classification_labels"]]
                                         # 'sct_slice_num', 'de_stag_mapped']]
# Order the values in the columns
df_sr_and_metadata = df_sr_and_metadata.sort_values(by=['PatientID', 'StudyInstanceUID', 'ReferencedSeriesInstanceUID', 'TrackingIdentifier'])
df_sr_and_metadata.head()



Unnamed: 0,PatientID,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,ReferencedSeriesInstanceUID,TrackingIdentifier,TrackingUID,ReferencedSOPInstanceUID,FindingType.CodingSchemeDesignator,FindingType.CodeValue,...,pixel_spacing_x,pixel_spacing_y,width,height,center_x,center_y,ipp0,ipp1,ipp2,classification_labels
11,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.65148884593862837207...,1.2.826.0.1.3680043.8.498.35417581706100113433...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,1,1.2.826.0.1.3680043.8.498.76246688780063832002...,1.3.6.1.4.1.14519.5.2.1.6655.2359.103293611003...,SCT,1187332001,...,0.841797,0.841797,33.0,68.0,320.5,338.0,-228.036835,-93.063156,-229.0,0.0
12,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.65148884593862837207...,1.2.826.0.1.3680043.8.498.35417581706100113433...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,2,1.2.826.0.1.3680043.8.498.78888202751369901753...,1.3.6.1.4.1.14519.5.2.1.6655.2359.592450921957...,SCT,1187332001,...,0.841797,0.841797,46.0,78.0,315.0,340.0,-228.036835,-93.063156,-224.0,0.0
13,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.65148884593862837207...,1.2.826.0.1.3680043.8.498.35417581706100113433...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,3,1.2.826.0.1.3680043.8.498.23966559968757206603...,1.3.6.1.4.1.14519.5.2.1.6655.2359.326712390675...,SCT,1187332001,...,0.841797,0.841797,62.0,72.0,318.0,349.0,-228.036835,-93.063156,-219.0,0.0
14,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.65148884593862837207...,1.2.826.0.1.3680043.8.498.35417581706100113433...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,4,1.2.826.0.1.3680043.8.498.74195107525298506651...,1.3.6.1.4.1.14519.5.2.1.6655.2359.217649008723...,SCT,1187332001,...,0.841797,0.841797,69.0,78.0,324.5,354.0,-228.036835,-93.063156,-214.0,0.0
15,Lung_Dx-A0001,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,1.2.826.0.1.3680043.8.498.65148884593862837207...,1.2.826.0.1.3680043.8.498.35417581706100113433...,1.3.6.1.4.1.14519.5.2.1.6655.2359.213534032021...,5,1.2.826.0.1.3680043.8.498.29486846510537247511...,1.3.6.1.4.1.14519.5.2.1.6655.2359.102500633407...,SCT,1187332001,...,0.841797,0.841797,69.0,92.0,320.5,356.0,-228.036835,-93.063156,-209.0,0.0


In [72]:
df_sr_and_metadata.columns

Index(['PatientID', 'StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID',
       'ReferencedSeriesInstanceUID', 'TrackingIdentifier', 'TrackingUID',
       'ReferencedSOPInstanceUID', 'FindingType.CodingSchemeDesignator',
       'FindingType.CodeValue', 'FindingType.CodeMeaning',
       'FindingSite.CodingSchemeDesignator', 'FindingSite.CodeValue',
       'FindingSite.CodeMeaning', 'pixel_spacing_x', 'pixel_spacing_y',
       'width', 'height', 'center_x', 'center_y', 'ipp0', 'ipp1', 'ipp2',
       'classification_labels'],
      dtype='object')

In [73]:
# Let's get the counts of the de_stag_mapped
df_sr_and_metadata_counts = df_sr_and_metadata['classification_labels'].value_counts().sort_index()
df_sr_and_metadata_counts

Unnamed: 0_level_0,count
classification_labels,Unnamed: 1_level_1
0.0,13965
1.0,201
2.0,4283


# Delete later - temporarily save out csv file to Google Drive

In [74]:
df_sr_and_metadata.to_csv("/content/lung_pet_ct_dx_fm.csv")

In [75]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [76]:
!cp "/content/lung_pet_ct_dx_fm.csv" "/content/gdrive/MyDrive/Colab Notebooks/SR_Lung-PET-CT-Dx/demo/"