# Introduction
This project is about creating an algorithm that would predict the detection of lung cancer based on CT scans. The algorithm would also predict what type of cancer is within the patient and the size of the tumor if applied.

In [10]:
# Install tcia_utill and itk if you haven't already
#%pip install tcia_utils -q
#%pip install itk

Collecting itk
  Downloading itk-5.3.0-cp39-cp39-win_amd64.whl (8.3 kB)
Collecting itk-registration==5.3.0
  Downloading itk_registration-5.3.0-cp39-cp39-win_amd64.whl (9.4 MB)
     ---------------------------------------- 9.4/9.4 MB 13.3 MB/s eta 0:00:00
Collecting itk-io==5.3.0
  Downloading itk_io-5.3.0-cp39-cp39-win_amd64.whl (8.6 MB)
     ---------------------------------------- 8.6/8.6 MB 13.7 MB/s eta 0:00:00
Collecting itk-core==5.3.0
  Downloading itk_core-5.3.0-cp39-cp39-win_amd64.whl (36.4 MB)
     --------------------------------------- 36.4/36.4 MB 12.1 MB/s eta 0:00:00
Collecting itk-numerics==5.3.0
  Downloading itk_numerics-5.3.0-cp39-cp39-win_amd64.whl (20.0 MB)
     --------------------------------------- 20.0/20.0 MB 13.3 MB/s eta 0:00:00
Collecting itk-filtering==5.3.0
  Downloading itk_filtering-5.3.0-cp39-cp39-win_amd64.whl (23.8 MB)
     --------------------------------------- 23.8/23.8 MB 12.8 MB/s eta 0:00:00
Collecting itk-segmentation==5.3.0
  Downloading itk

In [1]:
# Import the necessary modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import pandas as pd
from tcia_utils import nbia
import os
import sys
import itk
import pydicom as dicom

In [2]:
# Get the data using the nbia API call
data = nbia.getSeries(collection = "Lung-PET-CT-Dx", modality= "CT", bodyPart= "CHEST")

 
nbia.makeSeriesReport(data)

Calling...  https://services.cancerimagingarchive.net/nbia-api/services/v1/getSeries with parameters {'Collection': 'Lung-PET-CT-Dx', 'Modality': 'CT', 'BodyPartExamined': 'CHEST'}
Summary Statistics

Subjects:  201 subjects
Studies:  208 studies
Series:  425 series
Images:  104498 images

Series Counts - Collections:
Lung-PET-CT-Dx    425
Name: Collection, dtype: int64
Series Counts - Modality:
CT    425
Name: Modality, dtype: int64 

Series Counts - Body Parts Examined:
CHEST    425
Name: BodyPartExamined, dtype: int64 

Series Counts - Device Manufacturers:
SIEMENS               344
Philips                70
GE MEDICAL SYSTEMS     11
Name: Manufacturer, dtype: int64


In [16]:
# Download a sample
df = nbia.downloadSeries(data, number = 3) 
df.head()

Downloading 3 out of 425 Series Instance UIDs (scans).
Downloading... https://services.cancerimagingarchive.net/nbia-api/services/v1/getImage?NewFileNames=Yes&SeriesInstanceUID=1.3.6.1.4.1.14519.5.2.1.6655.2359.207154000096236603057216610983
Downloading... https://services.cancerimagingarchive.net/nbia-api/services/v1/getImage?NewFileNames=Yes&SeriesInstanceUID=1.3.6.1.4.1.14519.5.2.1.6655.2359.241981550240354690198744362919
Downloading... https://services.cancerimagingarchive.net/nbia-api/services/v1/getImage?NewFileNames=Yes&SeriesInstanceUID=1.3.6.1.4.1.14519.5.2.1.6655.2359.144797390935445988841819830728
Downloaded 3 out of 3 requested series from a total of 425 Series Instance UIDs (scans).
0 failed to download.
0 previously downloaded.


Unnamed: 0,Series UID,Collection,Data Description URI,Subject ID,Study UID,Study Description,Study Date,Series Description,Manufacturer,Modality,SOP Class UID,Number of Images,File Size,Series Number,License Name,License URL,Annotation Size
0,1.3.6.1.4.1.14519.5.2.1.6655.2359.207154000096...,Lung-PET-CT-Dx,https://doi.org/10.7937/TCIA.2020.NNC2-0461,Lung_Dx-A0002,1.3.6.1.4.1.14519.5.2.1.6655.2359.104855313699...,ThoraxAThoraxRoutine Adult,04-25-2007,ThoraxRoutine 8.0.0 B40f,SIEMENS,CT,1.2.840.10008.5.1.4.1.1.2,30,15818868,3.0,Creative Commons Attribution 4.0 International...,https://creativecommons.org/licenses/by/4.0/,0
1,1.3.6.1.4.1.14519.5.2.1.6655.2359.241981550240...,Lung-PET-CT-Dx,https://doi.org/10.7937/TCIA.2020.NNC2-0461,Lung_Dx-A0002,1.3.6.1.4.1.14519.5.2.1.6655.2359.104855313699...,ThoraxAThoraxRoutine Adult,04-25-2007,ThoraxRoutine 8.0.0 B70f,SIEMENS,CT,1.2.840.10008.5.1.4.1.1.2,30,15820248,2.0,Creative Commons Attribution 4.0 International...,https://creativecommons.org/licenses/by/4.0/,0
2,1.3.6.1.4.1.14519.5.2.1.6655.2359.144797390935...,Lung-PET-CT-Dx,https://doi.org/10.7937/TCIA.2020.NNC2-0461,Lung_Dx-A0003,1.3.6.1.4.1.14519.5.2.1.6655.2359.179476503242...,ThoraxAThoraxRoutine Adult,07-07-2006,ThoraxRoutine 10.0 B40f,SIEMENS,CT,1.2.840.10008.5.1.4.1.1.2,27,14236560,3.0,Creative Commons Attribution 4.0 International...,https://creativecommons.org/licenses/by/4.0/,0


In [20]:
series_uid = df["Series UID"][0]
sop_uid = df["SOP Class UID"][0]

print(sop_uid, series_uid)

print("Downloading images")
nbia.downloadImage(series_uid, sopUID=sop_uid, api_url = "restricted")

1.2.840.10008.5.1.4.1.1.2 1.3.6.1.4.1.14519.5.2.1.6655.2359.207154000096236603057216610983
Downloading images
Your security token for accessing the Restricted API is expired or does not exist. Create one using getToken().


In [13]:
print(df)

In [25]:
# Get unique Series UID
#series_uid = df["Series UID"].unique()[0]

#sop_uid = str(df.loc[df["Series UID"] == series_uid, "SOP Class UID"])
sop_uid = "1.2.840.10008.5.1.4.1.1.2"
series_uid = "1.3.6.1.4.1.14519.5.2.1.6655.2359.115443980116685191938898384240"

print(sop_uid)
#image = nbia.viewSeries(series_uid)

1.2.840.10008.5.1.4.1.1.2


In [21]:
# Download images
baseUrl = "https://services.cancerimagingarchive.net/nbia-api/services/v1"
endpoint = "/getImage"
queryParams = "?SeriesInstanceUID=" + series_uid

url = baseUrl + endpoint + queryParams

images = requests.get(url=url)

token = nbia.getToken()

nbia.downloadImage(series_uid, sopUID=sop_uid)

Enter User: 
HTTP Error: 400 -- Double check your user name and password.
Downloading... https://services.cancerimagingarchive.net/nbia-api/services/v1/getSingleImage?SeriesInstanceUID=1.3.6.1.4.1.14519.5.2.1.6655.2359.207154000096236603057216610983&SOPInstanceUID=1.2.840.10008.5.1.4.1.1.2
Error: 500 , double check your permissions and Series/SOP UIDs.
Series UID: 1.3.6.1.4.1.14519.5.2.1.6655.2359.207154000096236603057216610983
SOP UID:  1.2.840.10008.5.1.4.1.1.2


In [29]:
# Get patient info
df = nbia.getStudy(collection = "Lung-PET-CT-Dx", format = "df", api_url = "")
display(df)
#print(df.Phantom.unique())

Calling...  https://services.cancerimagingarchive.net/nbia-api/services/v1/getPatientStudy with parameters {'Collection': 'Lung-PET-CT-Dx'}


Unnamed: 0,StudyInstanceUID,StudyDate,StudyDescription,PatientAge,PatientID,PatientName,PatientSex,Collection,SeriesCount
0,1.3.6.1.4.1.14519.5.2.1.6655.2359.165554066086...,2007-04-04 00:00:00.0,Chest,058Y,Lung_Dx-A0001,Lung_Dx-A0001,M,Lung-PET-CT-Dx,2
1,1.3.6.1.4.1.14519.5.2.1.6655.2359.104855313699...,2007-04-25 00:00:00.0,Thorax^AThoraxRoutine (Adult),053Y,Lung_Dx-A0002,Lung_Dx-A0002,F,Lung-PET-CT-Dx,2
2,1.3.6.1.4.1.14519.5.2.1.6655.2359.179476503242...,2006-07-07 00:00:00.0,Thorax^AThoraxRoutine (Adult),060Y,Lung_Dx-A0003,Lung_Dx-A0003,M,Lung-PET-CT-Dx,2
3,1.3.6.1.4.1.14519.5.2.1.6655.2359.191296879859...,2006-07-25 00:00:00.0,Chest,060Y,Lung_Dx-A0003,Lung_Dx-A0003,M,Lung-PET-CT-Dx,2
4,1.3.6.1.4.1.14519.5.2.1.6655.2359.197033995568...,2006-07-21 00:00:00.0,Chest,060Y,Lung_Dx-A0003,Lung_Dx-A0003,M,Lung-PET-CT-Dx,2
...,...,...,...,...,...,...,...,...,...
431,1.3.6.1.4.1.14519.5.2.1.6655.2359.134824913167...,2009-08-06 00:00:00.0,PET^08_Wholebody_Only (Adult),079Y,Lung_Dx-A0187,Lung_Dx-A0187,M,Lung-PET-CT-Dx,1
432,1.3.6.1.4.1.14519.5.2.1.6655.2359.195979263212...,2009-11-22 00:00:00.0,PET^08_Wholebody_Only (Adult),065Y,Lung_Dx-A0211,Lung_Dx-A0211,F,Lung-PET-CT-Dx,1
433,1.3.6.1.4.1.14519.5.2.1.6655.2359.130619563027...,2010-03-27 00:00:00.0,PET^02_Wholebody_Only (Adult),058Y,Lung_Dx-G0038,Lung_Dx-G0038,M,Lung-PET-CT-Dx,1
434,1.3.6.1.4.1.14519.5.2.1.6655.2359.207282789267...,2010-09-04 00:00:00.0,PET^02_CBM_Wholebody_Only (Adult),059Y,Lung_Dx-A0251,Lung_Dx-A0251,M,Lung-PET-CT-Dx,4


In [22]:
nbia.getToken()

# Get data from manifest
manifest = "Lung-PET-CT-Dx-NBIA-Manifest-122220.tcia"

# converts manifest to list of UIDs
uids = nbia.manifestToList(manifest)

print(uids[0])
# Ues uids to get metadata report
count = 0

df = nbia.getSeriesList(uids)
df.head()

Enter User: 
HTTP Error: 400 -- Double check your user name and password.
Removing headers from TCIA mainfest.
Returning 1295 Series Instance UIDs (scans) as a list.
1.3.6.1.4.1.14519.5.2.1.6655.2359.257508444832901632590301540805
Your security token for accessing the Advanced API is expired or does not exist. Create one using getToken().
