<a href="https://colab.research.google.com/github/deepakri201/SEG_and_SR_for_NLSTSeg/blob/main/compare_to_ts/Compare_NLSTSeg_lung_lobe_to_TS_all.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Compare NLSTSeg lung lobe location to TotalSegmentator

This notebook compare the lung lobe assignment of the lesions to what is generated by TotalSegmentator.


Deepa Krishnaswamy

Brigham and Women's Hospital

October 2025

# Parameterization

In [None]:
# initialize this variable with your Google Cloud Project ID!
project_name = "idc-external-018" #@param {type:"string"}

import os
os.environ["GCP_PROJECT_ID"] = project_name

!gcloud config set project $project_name

from google.colab import auth
auth.authenticate_user()

Updated property [core/project].


# Environment setup

In [None]:
from google.cloud import bigquery
from google.cloud import storage

In [None]:
import pandas as pd
import json
import nibabel as nib
import numpy as np

In [None]:
nlstseg_original_bucket = "gs://nlstseg" # holds the actual tables and raw data
nlstseg_dicom_bucket = "gs://nlstseg_seg_and_sr/seg"

In [None]:
!pip install pydicom
import pydicom

Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl.metadata (9.4 kB)
Downloading pydicom-3.0.1-py3-none-any.whl (2.4 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.4/2.4 MB[0m [31m104.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m61.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-3.0.1


In [None]:
!pip install SimpleITK
import SimpleITK as sitk

Collecting SimpleITK
  Downloading simpleitk-2.5.2-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.2 kB)
Downloading simpleitk-2.5.2-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (52.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SimpleITK
Successfully installed SimpleITK-2.5.2


In [None]:
# DCMQI - to convert nifti to DICOM Segmentation object
!wget https://github.com/QIICR/dcmqi/releases/download/v1.4.0/dcmqi-1.4.0-linux.tar.gz
!tar zxvf dcmqi-1.4.0-linux.tar.gz
!cp dcmqi-1.4.0-linux/bin/* /usr/local/bin/

--2025-10-30 15:00:59--  https://github.com/QIICR/dcmqi/releases/download/v1.4.0/dcmqi-1.4.0-linux.tar.gz
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/50675718/915cb14a-48f5-4288-9a5f-bfbcc67daf43?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-10-30T15%3A43%3A41Z&rscd=attachment%3B+filename%3Ddcmqi-1.4.0-linux.tar.gz&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-10-30T14%3A43%3A00Z&ske=2025-10-30T15%3A43%3A41Z&sks=b&skv=2018-11-09&sig=i7LSceXwRVCGw%2FlWDCrhA6i8OejOgYrnjBxSKj8D0kY%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc2MTgzODI1OSwibmJmIjoxNzYxODM2NDU5LCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHV

# Functions

In [None]:
def resample_image_or_mask(fixed_image_filename, moving_image_filename, warped_filename, warp_mask=0):

  fixed_itk_image = sitk.ReadImage(fixed_image_filename)
  moving_itk_image = sitk.ReadImage(moving_image_filename)
  # moving_itk_image = sitk.Cast(moving_itk_image, sitk.sitkFloat32)

  print('moving image type: ' + str(moving_itk_image.GetPixelIDTypeAsString()))

  original_spacing = moving_itk_image.GetSpacing()
  original_size = moving_itk_image.GetSize()
  out_spacing = fixed_itk_image.GetSpacing()
  out_size = fixed_itk_image.GetSize()

  print("Fixed origin:", fixed_itk_image.GetOrigin())
  print("Fixed spacing:", fixed_itk_image.GetSpacing())
  print("Fixed direction:", fixed_itk_image.GetDirection())

  print("Moving origin:", moving_itk_image.GetOrigin())
  print("Moving spacing:", moving_itk_image.GetSpacing())
  print("Moving direction:", moving_itk_image.GetDirection())

  resample = sitk.ResampleImageFilter()
  resample.SetOutputSpacing(out_spacing)
  resample.SetSize(out_size)
  resample.SetOutputDirection(fixed_itk_image.GetDirection())
  resample.SetOutputOrigin(fixed_itk_image.GetOrigin())
  resample.SetTransform(sitk.Transform())
  # resample.SetDefaultPixelValue(T2_itk_image.GetPixelIDValue())
  resample.SetDefaultPixelValue(0)

  # if equal to 1, warp using nearest neighbor = ROI
  if (warp_mask):
    resample.SetInterpolator(sitk.sitkNearestNeighbor)
  else:
    resample.SetInterpolator(sitk.sitkLinear)

  result_image = resample.Execute(moving_itk_image)
  sitk.WriteImage(result_image, warped_filename)

  print('result image type: ' + str(result_image.GetPixelIDTypeAsString()))

  return

# Compare

## Query to get intersecting SeriesInstanceUIDs

In [None]:
# NLSTSeg - get a lesion nii from a particular segmentation, and the corresponding lung lobe assignment.
# TotalSegmentator - check which lung lobe(s) the lesion overlaps with.
# To report:
  # PatientID, StudyInstanceUID, ReferencedSeriesInstanceUID, TS_SEG_SeriesInstanceUID, NLSTSeg_SEG_SeriesInstanceUID,
  # NLSTSeg_SegmentLabel, NLSTSeg_AnatomicRegion, TS_best_matching_lobe, matching (1 if they match, 0 if not), % overlap of TS and NLSTSeg

# might need to check geometry...

In [None]:
### First get a list of the ReferencedSeriesInstanceUID that have both a NLSTSeg lesion SEG and a TotalSegmentator SEG ###

# NLSTSeg
client_bq = bigquery.Client(project=project_name)
query = f"""
    SELECT DISTINCT
      PatientID,
      StudyInstanceUID,
      SeriesInstanceUID,
      segmented_SeriesInstanceUID
    FROM
      `idc-external-018.nlstseg_seg_and_sr.segmentations2`
      """
df_nlstseg = client_bq.query(query).to_dataframe()

# NLSTSeg with all seg information
query = f"""
    SELECT DISTINCT
      PatientID,
      StudyInstanceUID,
      SeriesInstanceUID,
      segmented_SeriesInstanceUID,
      SeriesDescription,
      AnatomicRegion.CodeMeaning as AnatomicRegion,
      SegmentNumber,
      SegmentLabel,
      SegmentDescription
    FROM
      `idc-external-018.nlstseg_seg_and_sr.segmentations2`
      """
df_nlstseg_all = client_bq.query(query).to_dataframe()

# TotalSegmentator
query = f"""
  WITH select_series AS (
  SELECT DISTINCT
    PatientID,
    StudyInstanceUID,
    SeriesInstanceUID,
    segmented_SeriesInstanceUID
  FROM
    `bigquery-public-data.idc_current.segmentations`
  WHERE
    'TotalSegmentator v1.5.6' IN UNNEST(SegmentAlgorithmName)
  )

  SELECT
    select_series.PatientID,
    select_series.StudyInstanceUID,
    select_series.SeriesInstanceUID,
    select_series.segmented_SeriesInstanceUID,
    dicom_all.gcs_url
  FROM
    select_series
  JOIN
    `bigquery-public-data.idc_current.dicom_all` as dicom_all
  ON
    select_series.SeriesInstanceUID = dicom_all.SeriesInstanceUID
  """
df_ts = client_bq.query(query).to_dataframe()

# Get the overlap
nlstseg_series = df_nlstseg['segmented_SeriesInstanceUID'].values
ts_series = df_ts['segmented_SeriesInstanceUID'].values
intersect_series = list(set(nlstseg_series) & set(ts_series))
# intersect_series = [s for s in nlstseg_series if s in set(ts_series)]

print('Number of series in NLSTSeg: ' + str(len(nlstseg_series)))
print('Number of series in TS: ' + str(len(ts_series)))
print('Number of series overlap: ' + str(len(intersect_series)))

# Get the df overlap

# Form the dataframe with TS data
df_seg1 = df_ts[df_ts['segmented_SeriesInstanceUID'].isin(intersect_series)]
df_seg1 = df_seg1.rename(columns={'SeriesInstanceUID': 'TS_SeriesInstanceUID'})
gcs_url = df_ts[df_ts['segmented_SeriesInstanceUID'].isin(intersect_series)]['gcs_url'].values
df_seg1['TS_path'] = gcs_url

# Form the dataframe with NLSTSeg data
df_seg2 = df_nlstseg[df_nlstseg['segmented_SeriesInstanceUID'].isin(intersect_series)]
series_list = df_seg2[df_seg2['segmented_SeriesInstanceUID'].isin(intersect_series)]['SeriesInstanceUID'].values
df_seg2['NLSTSeg_SeriesInstanceUID'] = series_list
# Add the path name in bucket for NLSTSeg_SeriesInstanceUID
nlstseg_path = [os.path.join("gs://nlstseg_seg_and_sr/seg", f + "_SEG.dcm") for f in df_seg2['segmented_SeriesInstanceUID'].values]
df_seg2['NLSTSeg_path'] = nlstseg_path
seg_list = [os.path.join("gs://nlstseg/images", f + '_CT.nii.gz') for f in df_seg2['PatientID'].values]
df_seg2['NLSTSeg_CT_path'] = seg_list

# Now merge
df_seg = pd.merge(df_seg1, df_seg2, on='segmented_SeriesInstanceUID')
df_seg = df_seg[['PatientID_x', 'StudyInstanceUID_x', 'segmented_SeriesInstanceUID',
                 'TS_SeriesInstanceUID', 'TS_path',
                 'NLSTSeg_SeriesInstanceUID', 'NLSTSeg_path',
                 'NLSTSeg_CT_path']]
df_seg = df_seg.rename(columns={'PatientID_x':'PatientID',
                                'StudyInstanceUID_x': 'StudyInstanceUID'})

Number of series in NLSTSeg: 601
Number of series in TS: 126051
Number of series overlap: 575


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_seg2['NLSTSeg_SeriesInstanceUID'] = series_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_seg2['NLSTSeg_path'] = nlstseg_path
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_seg2['NLSTSeg_CT_path'] = seg_list


In [None]:
# # Get a subset of the patients that have multiple lesions

# query = f"""
#   SELECT
#     *
#   FROM
#     `idc-external-018.nlstseg_seg_and_sr.quantitative_measurements_with_minmax_volume_with_urls`
#   WHERE
#     num_lesions > 2
# """

# df_nlstseg_lesion_count = client_bq.query(query).to_dataframe()
# intersect_series2 = list(set(df_nlstseg_lesion_count['sourceSegmentedSeriesUID'].values))
# num_series = len(intersect_series2)
# print('num_series > 2 lesions: ' + str(num_series))

# # And now intersect this with the intersect_series

# SeriesInstanceUID_list = list(set(intersect_series2) & set(intersect_series))
# num_series = len(SeriesInstanceUID_list)
# print('num_series: ' + str(num_series))

## Perform the comparison

In [None]:
if not os.path.isdir("/content/lesion_matching"):
  os.makedirs("/content/lesion_matching", exist_ok=True)

SeriesInstanceUID_list = list(set(df_seg['segmented_SeriesInstanceUID'].values))
print('Num series: ' + str(len(SeriesInstanceUID_list)))

checkpoints = {int(len(SeriesInstanceUID_list) * i / 10) for i in range(1, 11)}

Num series: 575


In [None]:
# Get list of ones that are missing

from pathlib import Path

SeriesInstanceUID_list_completed = os.listdir("/content/lesion_matching")
SeriesInstanceUID_list_completed = [Path(f).stem for f in SeriesInstanceUID_list_completed]
SeriesInstanceUID_list_missing = list(set(SeriesInstanceUID_list)-set(SeriesInstanceUID_list_completed))
print(len(SeriesInstanceUID_list_missing))

SeriesInstanceUID_list_missing


15


['1.2.840.113654.2.55.55249935124047036645194998679497712331',
 '1.3.6.1.4.1.14519.5.2.1.7009.9004.674424600608887623087401566792',
 '1.3.6.1.4.1.14519.5.2.1.7009.9004.290248773670969586908592892491',
 '1.2.840.113654.2.55.234870527508599761640999671632707483702',
 '1.2.840.113654.2.55.105508597935357050120573060055759818144',
 '1.2.840.113654.2.55.16111750182367908886746912872049323704',
 '1.2.840.113654.2.55.130711389735203660002446393754174369152',
 '1.2.840.113654.2.55.303640943191823703361709653866526987877',
 '1.2.840.113654.2.55.274095880606342862580083739420627901741',
 '1.2.840.113654.2.55.67095101227495808685302586770473894957',
 '1.2.840.113654.2.55.70254381803418681935934856553657623781',
 '1.3.6.1.4.1.14519.5.2.1.7009.9004.136132521671327053038928327413',
 '1.3.6.1.4.1.14519.5.2.1.7009.9004.972046923789567546322049002825',
 '1.3.6.1.4.1.14519.5.2.1.7009.9004.257830882183445545540953609532',
 '1.2.840.113654.2.55.55905915368746417680608580988846234578']

In [None]:
# for series_index,segmented_SeriesInstanceUID in enumerate(SeriesInstanceUID_list,1):
for series_index,segmented_SeriesInstanceUID in enumerate(SeriesInstanceUID_list_missing,1):

  print('***** series_index: ' + str(series_index) + ' series: ' + str(segmented_SeriesInstanceUID) + ' *****')

  ##########################
  ### Download the files ###
  ##########################

  # Get paths
  TS_SeriesInstanceUID = df_seg[df_seg['segmented_SeriesInstanceUID']==segmented_SeriesInstanceUID]['TS_SeriesInstanceUID'].values[0]
  NLSTSeg_SeriesInstanceUID = df_seg[df_seg['segmented_SeriesInstanceUID']==segmented_SeriesInstanceUID]['NLSTSeg_SeriesInstanceUID'].values[0]
  TS_path = df_seg[df_seg['segmented_SeriesInstanceUID']==segmented_SeriesInstanceUID]['TS_path'].values[0]
  NLSTSeg_path = df_seg[df_seg['segmented_SeriesInstanceUID']==segmented_SeriesInstanceUID]['NLSTSeg_path'].values[0]
  NLSTSeg_CT_path = df_seg[df_seg['segmented_SeriesInstanceUID']==segmented_SeriesInstanceUID]['NLSTSeg_CT_path'].values[0]
  # download files
  !gsutil cp $TS_path "/content/ts.dcm"
  !gsutil cp $NLSTSeg_path "/content/nlstseg.dcm"
  !gsutil cp $NLSTSeg_CT_path "/content/nlstseg_ct.nii.gz"

  #####################################
  ### Convert from DICOM SEG to nii ###
  #####################################

  # NLSTSeg
  if not os.path.isdir("/content/nlstseg_output"):
    os.makedirs("/content/nlstseg_output", exist_ok=True)
  !segimage2itkimage -t "nii" --outputDirectory "/content/nlstseg_output" --inputDICOM "/content/nlstseg.dcm"  --mergeSegments
  json_filename = "/content/nlstseg_output/meta.json"
  with open(json_filename, 'r') as file:
    nlstseg_data = json.load(file)
  nlstseg_data_pretty = json.dumps(nlstseg_data, indent=2)
  # TS
  if not os.path.isdir("/content/ts_output"):
    os.makedirs("/content/ts_output", exist_ok=True)
  !segimage2itkimage -t "nii" --outputDirectory "/content/ts_output" --inputDICOM "/content/ts.dcm"  --mergeSegments
  # Let's print what's in the json file
  json_filename = "/content/ts_output/meta.json"
  with open(json_filename, 'r') as file:
    ts_data = json.load(file)
  ts_data_pretty = json.dumps(ts_data, indent=2)

  #############################################
  ### Resample both to the original CT file ###
  #############################################

  # This is the one from NLSTSeg - not the same as taking DICOM and converting to CT using dcm2niix - which was what was used for TS
  # NLSTSeg
  fixed_image_filename = '/content/nlstseg_ct.nii.gz'
  moving_image_filename = '/content/nlstseg_output/1.nii.gz'
  warped_filename = '/content/nlstseg_warped.nii.gz'
  warp_mask = 1
  resample_image_or_mask(fixed_image_filename, moving_image_filename, warped_filename, warp_mask)
  # TS
  fixed_image_filename = '/content/nlstseg_ct.nii.gz'
  moving_image_filename = '/content/ts_output/1.nii.gz'
  warped_filename = '/content/ts_warped.nii.gz'
  warp_mask = 1
  resample_image_or_mask(fixed_image_filename, moving_image_filename, warped_filename, warp_mask)
  # Load the resampled files
  ts_nii = "/content/ts_warped.nii.gz"
  nlstseg_nii = "/content/nlstseg_warped.nii.gz"
  ts_img = nib.load(ts_nii).get_fdata()
  nlstseg_img = nib.load(nlstseg_nii).get_fdata()
  print("ts_img: " + str(ts_img.shape))
  print("nlstseg_img: " + str(nlstseg_img.shape))

  ### Get corresponding info from df_nlstseg_all ###
  df_lesion_info = df_nlstseg_all[df_nlstseg_all['segmented_SeriesInstanceUID']==segmented_SeriesInstanceUID]
  num_lesions = len(df_lesion_info)
  print('num_lesions: ' + str(num_lesions))

  ############################
  ### Create some mappings ###
  ############################

  # AnatomicRegion in NLSTSeg to SegmentLabel in TS
  mapping_nlstseg_to_ts = dict()
  mapping_nlstseg_to_ts = {"Upper lobe of right lung": "Right Upper lobe of lung",
                           "Upper lobe of left lung": "Left Upper lobe of lung",
                           "Lower lobe of right lung": "Right Lower lobe of lung",
                           "Lower lobe of left lung": "Left Lower lobe of lung",
                           "Middle lobe of right lung": "Middle lobe of right lung"}
  # Simplified map to binary
  mapping_ts_lung_to_label = dict()
  ### Instead we use all from the json file - later I could get this from the segmentations big query table. ###
  # assuming `data` is your JSON dict
  segments = [s for sublist in ts_data["segmentAttributes"] for s in sublist]
  # use json_normalize to flatten nested fields automatically
  df = pd.json_normalize(
      segments,
      sep='_'
  )
  # now add top-level metadata to every row
  meta_fields = [
      "BodyPartExamined",
      "ClinicalTrialCoordinatingCenterName",
      "ClinicalTrialSeriesID",
      "ClinicalTrialTimePointID",
      "ContentCreatorName",
      "InstanceNumber",
      "SeriesDescription",
      "SeriesNumber"
  ]
  for field in meta_fields:
      df[field] = ts_data.get(field)
  # Now form the dict
  keys = df['SegmentLabel'].values
  values = df['labelID'].values
  mapping_ts_lung_to_label = dict(zip(keys, values))
  # Now do a reverse mapping
  mapping_label_to_ts_lung = {v: k for k, v in mapping_ts_lung_to_label.items()}

  ### For each lesion, get the lobe that it's in from TS seg ###

  df_ts_regions_summary = pd.DataFrame()

  for lesion_index in range(0,num_lesions):

    # For the lesion index, find the ids to compare
    NLSTSeg_AnatomicRegion = df_lesion_info['AnatomicRegion'].values[lesion_index]
    NLSTSeg_SegmentNumber = df_lesion_info['SegmentNumber'].values[lesion_index]
    # if not (NLSTSeg_AnatomicRegion in list(mapping_nlstseg_to_ts.keys())):
    #   print('Region ' + NLSTSeg_AnatomicRegion + ' does not exist in TS')
    #   continue
    if NLSTSeg_AnatomicRegion not in list(mapping_nlstseg_to_ts.keys()):

      print(f"Region '{NLSTSeg_AnatomicRegion}' does not exist in TS — recording as unmatched.")
      df_temp = pd.DataFrame({
          "TS_Segment": ["No corresponding region in TS"],
          "Percentage": [0],
          "NLSTSeg_Segment": [NLSTSeg_AnatomicRegion],
          "Lesion": [df_lesion_info['SegmentLabel'].values[lesion_index]],
          "PatientID": [df_lesion_info['PatientID'].values[lesion_index]],
          "StudyInstanceUID": [df_lesion_info['StudyInstanceUID'].values[lesion_index]],
          "segmented_SeriesInstanceUID": [df_lesion_info['segmented_SeriesInstanceUID'].values[lesion_index]],
          "TS_SeriesInstanceUID": [TS_SeriesInstanceUID],
          "NLSTSeg_SeriesInstanceUID": [NLSTSeg_SeriesInstanceUID],
      })
      df_ts_regions_summary = pd.concat([df_ts_regions_summary, df_temp])

    else:

      # Get the lesion_index values
      nlstseg_lesion_indices = np.where(nlstseg_img==NLSTSeg_SegmentNumber)

      # Get the values in the TS segmentation
      ts_values = ts_img[nlstseg_lesion_indices]
      # remove 0's from array - background
      ts_values = ts_values[ts_values != 0]

      # Now find the lung regions that correspond to the ts_values
      ts_regions = [mapping_label_to_ts_lung[f] for f in ts_values]

      # Now create df
      df_ts_regions = pd.DataFrame(ts_regions, columns=['TS_Segment'])
      # Compute counts and percentages
      df_temp = (
          df_ts_regions['TS_Segment']
          .value_counts(normalize=True)  # gives fraction
          .mul(100)                      # convert to percentage
          .rename('Percentage')
          .reset_index()
          .rename(columns={'index': 'TS_Segment'})
      )
      num_overlapping_regions = len(df_temp)

      # for each overlapping region
      for region in range(0,num_overlapping_regions):
        df_temp['NLSTSeg_Segment'] = mapping_nlstseg_to_ts[NLSTSeg_AnatomicRegion]
        df_temp['Lesion'] = df_lesion_info['SegmentLabel'].values[lesion_index]
        df_temp['PatientID'] = df_lesion_info['PatientID'].values[lesion_index]
        df_temp['StudyInstanceUID'] = df_lesion_info['StudyInstanceUID'].values[lesion_index]
        df_temp['segmented_SeriesInstanceUID'] = df_lesion_info['segmented_SeriesInstanceUID'].values[lesion_index]
        df_temp['TS_SeriesInstanceUID'] = TS_SeriesInstanceUID
        df_temp['NLSTSeg_SeriesInstanceUID'] = NLSTSeg_SeriesInstanceUID
        df_ts_regions_summary = pd.concat([df_ts_regions_summary,df_temp])

  ###############################
  ### Add in matching columns ###
  ###############################

  # Add a matching column - on a per lesion basis
  try:
    df_ts_regions_summary["per_lesion_match"] = (df_ts_regions_summary["NLSTSeg_Segment"] == df_ts_regions_summary["TS_Segment"]).astype(int)
  except:
    print('ERROR: cannot find one of the segments in')
    continue
  # Add a matching column - on a per series basis
  # If any of the per_lesion_match in a series are 0, set to 0. else if all 1 set to 1.
  df_ts_regions_summary["per_series_match"] = (
      df_ts_regions_summary.groupby(["PatientID", "StudyInstanceUID", "segmented_SeriesInstanceUID"])["per_lesion_match"]
        .transform("min")
  )
  # Add in the viewer url
  # Holds the segmented_SeriesInstanceUID, TS_SeriesInstanceUID and the NLSTSeg_SeriesInstanceUID
  viewer_url_list = ["https://viewers-sandbox-gha-testing.web.app/viewer/?StudyInstanceUIDs=" +
                    str(study) +  "&SeriesInstanceUIDs=" + str(seg_series) + ',' + str(ts_series) + ',' + str(nlstseg_series) +
                      "&gcp=projects/idc-external-018/locations/us-central1/datasets/nlstseg_seg_and_sr/dicomStores/nlstseg_seg_and_sr"
                    for study,seg_series,ts_series,nlstseg_series in
                    zip(df_ts_regions_summary['StudyInstanceUID'].values,
                        df_ts_regions_summary['segmented_SeriesInstanceUID'].values,
                        df_ts_regions_summary['TS_SeriesInstanceUID'].values,
                        df_ts_regions_summary['NLSTSeg_SeriesInstanceUID'].values)]
  df_ts_regions_summary['viewer_url'] = viewer_url_list

  # Reorder columns
  df_ts_regions_summary = df_ts_regions_summary[['PatientID', 'StudyInstanceUID', 'segmented_SeriesInstanceUID',
                                                'TS_SeriesInstanceUID', 'NLSTSeg_SeriesInstanceUID',
                                                'Lesion', 'NLSTSeg_Segment', 'TS_Segment', 'Percentage',
                                                'per_lesion_match', 'per_series_match',
                                                'viewer_url']]

  # Save this csv and copy to a bucket
  df_ts_regions_summary_filename = os.path.join("/content/lesion_matching", str(segmented_SeriesInstanceUID) + ".csv")
  df_ts_regions_summary.to_csv(df_ts_regions_summary_filename)
  bucket_filename = os.path.join("gs://nlstseg_seg_and_sr_analysis/lesion_matching", str(segmented_SeriesInstanceUID) + ".csv")
  !gsutil cp $df_ts_regions_summary_filename $bucket_filename

  ### Delete files ###
  # NLSTSeg files
  !rm "/content/nlstseg.dcm"
  !rm "/content/nlstseg_warped.nii.gz"
  !rm "/content/nlstseg_output/1.nii.gz"
  !rm "/content/nlstseg_output/meta.json"
  # TS files
  !rm "/content/ts.dcm"
  !rm "/content/ts_warped.nii.gz"
  !rm "/content/ts_output/1.nii.gz"
  !rm "/content/ts_output/meta.json"
  # Other files
  !rm "/content/nlstseg_ct.nii.gz"

  if series_index in checkpoints:
    print(f"{(series_index / len(SeriesInstanceUID_list)) * 100:.0f}% of series processed.")


***** series_index: 1 series: 1.2.840.113654.2.55.55249935124047036645194998679497712331 *****
Copying gs://idc-open-data/594c8b0f-bd4e-46fa-bdbe-a23600839710/ff37d5f3-874a-4845-8e93-4abdf7da5e11.dcm...
- [1 files][ 97.0 MiB/ 97.0 MiB]                                                
Operation completed over 1 objects/97.0 MiB.                                     
Copying gs://nlstseg_seg_and_sr/seg/1.2.840.113654.2.55.55249935124047036645194998679497712331_SEG.dcm...
- [1 files][590.4 KiB/590.4 KiB]                                                
Operation completed over 1 objects/590.4 KiB.                                    
Copying gs://nlstseg/images/110021_CT.nii.gz...
| [1 files][ 35.0 MiB/ 35.0 MiB]                                                
Operation completed over 1 objects/35.0 MiB.                                     
dcmqi repository URL: https://github.com/QIICR/dcmqi revision: 4e5b700 tag: v1.4.0
Loading DICOM SEG file /content/nlstseg.dcm
Row direction: 1 0 0
Col di

In [None]:
# Download csvs from the bucket

# Create df and save as csv

df_merged = pd.DataFrame()
filenames = os.listdir("/content/lesion_matching")
filenames = [os.path.join("/content/lesion_matching", f) for f in filenames]
num_files = len(filenames)
print(filenames)
print(num_files)

for f in range(0,num_files):
  df_temp = pd.read_csv(filenames[f])
  df_merged = pd.concat([df_merged, df_temp])

['/content/lesion_matching/1.3.6.1.4.1.14519.5.2.1.7009.9004.933841618640476833036494357619.csv', '/content/lesion_matching/1.3.6.1.4.1.14519.5.2.1.7009.9004.328556977712236541287818198607.csv', '/content/lesion_matching/1.3.6.1.4.1.14519.5.2.1.7009.9004.363986465227756169111234831376.csv', '/content/lesion_matching/1.2.840.113654.2.55.333959368472243697306539144681085718022.csv', '/content/lesion_matching/1.2.840.113654.2.55.225893401562023672707560220661212937130.csv', '/content/lesion_matching/1.2.840.113654.2.55.178610016791794005782737427701208142463.csv', '/content/lesion_matching/1.3.6.1.4.1.14519.5.2.1.7009.9004.226300330064379796263332757787.csv', '/content/lesion_matching/1.3.6.1.4.1.14519.5.2.1.7009.9004.179068386711781741405062504446.csv', '/content/lesion_matching/1.2.840.113654.2.55.135968128742262057179054957067531073282.csv', '/content/lesion_matching/1.2.840.113654.2.55.128811497746561400357955147775811342417.csv', '/content/lesion_matching/1.3.6.1.4.1.14519.5.2.1.7009

In [None]:
df_merged.to_csv("/content/lesion_matching.csv")

In [None]:
len(df_merged)

1210