## Part1) Feature Extraction

In [1]:
# Mounting your Google Drive to your Colab.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# The tumor in the Lung PET-CT data set was segmented and will be quantified in the following steps.
# The purpose is to quantify the tumor mass into mineable data by extracting features from the tumor.
# The definitions of some of the features were provided in the supplementary document.

In [2]:
# Installation of Radiomic package on your Colab.
!pip install pyradiomics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyradiomics
  Downloading pyradiomics-3.0.1.tar.gz (34.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.5/34.5 MB[0m [31m48.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting SimpleITK>=0.9.1
  Downloading SimpleITK-2.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (52.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.7/52.7 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
Collecting pykwalify>=1.6.0
  Downloading pykwalify-1.8.0-py2.py3-none-any.whl (24 kB)
Collecting ruamel.yaml>=0.16.0
  Downloading ruamel.yaml-0.17.21-py3-none-any.whl (109 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.5/109.5 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docopt>=0.6.2
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py)

In [3]:
# Importing required libraries

from sklearn.model_selection import train_test_split
from radiomics import firstorder, glcm
from sklearn import metrics
from sklearn.svm import SVC
import SimpleITK as sitk
import pandas as pd
import numpy as np
import six

In [4]:
# Reading the images along with their segmentation masks

CT_Image = sitk.ReadImage('/content/drive/My Drive/Data/CT_Data.nii.gz')
CT_Mask = sitk.ReadImage('/content/drive/My Drive/Data/CT_Mask.nii.gz')

PET_Image = sitk.ReadImage('/content/drive/My Drive/Data/PET_Data.nii.gz')
PET_Mask = sitk.ReadImage('/content/drive/My Drive/Data/PET_Mask.nii.gz')

In [5]:
# First Order Statistics Features from CT image

FOS_CT = firstorder.RadiomicsFirstOrder(CT_Image, CT_Mask)
FOS_CT.enableAllFeatures()
FOS_CT_Features = FOS_CT.execute()
for (key, val) in six.iteritems(FOS_CT_Features):
  print('The value of the feature  ', key, ' is equal to:', val)

The value of the feature   10Percentile  is equal to: -513.0
The value of the feature   90Percentile  is equal to: 64.0
The value of the feature   Energy  is equal to: 1298192985.0
The value of the feature   Entropy  is equal to: 4.73974457419883
The value of the feature   InterquartileRange  is equal to: 321.0
The value of the feature   Kurtosis  is equal to: 3.3499165072125168
The value of the feature   Maximum  is equal to: 513.0
The value of the feature   MeanAbsoluteDeviation  is equal to: 195.09161325988427
The value of the feature   Mean  is equal to: -146.52185969673727
The value of the feature   Median  is equal to: -34.0
The value of the feature   Minimum  is equal to: -980.0
The value of the feature   Range  is equal to: 1493.0
The value of the feature   RobustMeanAbsoluteDeviation  is equal to: 142.1344273580915
The value of the feature   RootMeanSquared  is equal to: 276.7563467414598
The value of the feature   Skewness  is equal to: -1.0974796336022592
The value of the fe

In [6]:
# First Order Statistics Features from PET image

FOS_PET = firstorder.RadiomicsFirstOrder(PET_Image, PET_Mask)
FOS_PET.enableAllFeatures()
FOS_PET_Features = FOS_PET.execute()
for (key, val) in six.iteritems(FOS_PET_Features):
  print('The value of the feature  ', key, ' is equal to:', val)

The value of the feature   10Percentile  is equal to: 1402.0
The value of the feature   90Percentile  is equal to: 15343.5
The value of the feature   Energy  is equal to: 165775961961.0
The value of the feature   Entropy  is equal to: 8.994573480975289
The value of the feature   InterquartileRange  is equal to: 6744.25
The value of the feature   Kurtosis  is equal to: 5.047895462850615
The value of the feature   Maximum  is equal to: 32767.0
The value of the feature   MeanAbsoluteDeviation  is equal to: 4515.242330507267
The value of the feature   Mean  is equal to: 6709.045541706616
The value of the feature   Median  is equal to: 4593.5
The value of the feature   Minimum  is equal to: 214.0
The value of the feature   Range  is equal to: 32553.0
The value of the feature   RobustMeanAbsoluteDeviation  is equal to: 2950.6409337293276
The value of the feature   RootMeanSquared  is equal to: 8914.63623459254
The value of the feature   Skewness  is equal to: 1.5114199651359297
The value of 

In [7]:
# TODO: Can you interprete these results?

print("The average intensity of the tumor in CT image is:", FOS_CT_Features["Mean"])
print("The average intensity of the tumor in PET image is:", FOS_PET_Features["Mean"])

The average intensity of the tumor in CT image is: -146.52185969673727
The average intensity of the tumor in PET image is: 6709.045541706616


In [8]:
# Textural Features (Gray Level Co-occurrence Matrix) for CT image

CT_GLCM = glcm.RadiomicsGLCM(CT_Image, CT_Mask)
CT_GLCM.enableAllFeatures()
CT_GLCM_Features = CT_GLCM.execute()
for (key, val) in six.iteritems(CT_GLCM_Features):
  print('The value of the feature  ', key, ' is equal to:', val)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


The value of the feature   Autocorrelation  is equal to: 1344.3467313633894
The value of the feature   ClusterProminence  is equal to: 272072.6182566958
The value of the feature   ClusterShade  is equal to: -5267.609771720837
The value of the feature   ClusterTendency  is equal to: 261.3108614526634
The value of the feature   Contrast  is equal to: 24.61795710502916
The value of the feature   Correlation  is equal to: 0.8190509331047975
The value of the feature   DifferenceAverage  is equal to: 3.2737565956433015
The value of the feature   DifferenceEntropy  is equal to: 3.260314004442552
The value of the feature   DifferenceVariance  is equal to: 13.40197837130012
The value of the feature   Id  is equal to: 0.408721873801729
The value of the feature   Idm  is equal to: 0.33655436852642373
The value of the feature   Idmn  is equal to: 0.9936583645016732
The value of the feature   Idn  is equal to: 0.9519406922471757
The value of the feature   Imc1  is equal to: -0.19190470583872668
The

In [9]:
# Textural Features (Gray Level Co-occurrence Matrix) for PET image

PET_GLCM = glcm.RadiomicsGLCM(PET_Image, PET_Mask)
PET_GLCM.enableAllFeatures()
PET_GLCM_Features = PET_GLCM.execute()
for (key, val) in six.iteritems(PET_GLCM_Features):
  print('The value of the feature  ', key, ' is equal to:', val)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


The value of the feature   Autocorrelation  is equal to: 134423.56757161135
The value of the feature   ClusterProminence  is equal to: 182683951404.42166
The value of the feature   ClusterShade  is equal to: 119672692.79447362
The value of the feature   ClusterTendency  is equal to: 205100.10540722654
The value of the feature   Contrast  is equal to: 28172.19026335579
The value of the feature   Correlation  is equal to: 0.759624759699868
The value of the feature   DifferenceAverage  is equal to: 117.39309966678232
The value of the feature   DifferenceEntropy  is equal to: 8.009994992471293
The value of the feature   DifferenceVariance  is equal to: 13777.386324272575
The value of the feature   Id  is equal to: 0.03836336452815974
The value of the feature   Idm  is equal to: 0.013623099500202684
The value of the feature   Idmn  is equal to: 0.9847331097503366
The value of the feature   Idn  is equal to: 0.9231164819678406
The value of the feature   Imc1  is equal to: -0.7118238186330709

## Part2) Classification

#### The tumors from 31 PET-CT images were already segmented and first order as well as textural features were already extracted and saved in excel files. We already know which of the patients survived or not. From the machine learning perspective, that means we know the outcome labels.  Now, with the extracted features and the outcome labels, we can train a classifier to predict the survival status. For this task, we will use Support Vector Machine as a learning algorithm.

In [10]:
# Importing the Feature set

ct_features = pd.read_csv('/content/drive/My Drive/Data/ct_features.csv', header=None) 
pet_features = pd.read_csv('/content/drive/My Drive/Data/pet_features.csv', header=None) 
targets = pd.read_csv('/content/drive/My Drive/Data/labels.csv', header=None)

In [11]:
# Normalizing the feature set

x_all_ct = ct_features.values[:, :]
x_all_ct = (x_all_ct-np.mean(x_all_ct))/np.std(x_all_ct)

x_all_pet = pet_features.values[:,:]
x_all_pet = (x_all_pet-np.mean(x_all_pet))/np.std(x_all_pet)

y_all = targets.values[:,0]  # Outcome Labels
print(y_all)

[1. 0. 1. 0. 0. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 0.
 0. 0. 0. 1. 1. 1. 1.]


In [12]:
# Dividing the CT feature set into train and test sets

X_train_CT, X_test_CT, y_train_CT, y_test_CT = train_test_split(x_all_ct, y_all, test_size=0.3, random_state=42)

In [13]:
# Building a classifier model with CT features

Model_CT = SVC(gamma='auto')
Model_CT.set_params(kernel='rbf').fit(X_train_CT,y_train_CT.ravel())  

y_pred_CT = Model_CT.predict(X_test_CT) 
FOS_CT_Acc = metrics.accuracy_score(y_test_CT,y_pred_CT)
print('Accuracy Score:',metrics.accuracy_score(y_test_CT,y_pred_CT))

Accuracy Score: 0.7


In [14]:
# Dividing the PET feature set into train and test sets
X_train_PET, X_test_PET, y_train_PET, y_test_PET = train_test_split(x_all_pet, y_all, test_size=0.3, random_state=42)

In [15]:
# Building a classifier model with PET features

Model_PET = SVC(gamma='auto') 
Model_PET.set_params(kernel='rbf').fit(X_train_PET,y_train_PET.ravel())  

y_pred_PET = Model_PET.predict(X_test_PET) 
FOS_PET_Acc = metrics.accuracy_score(y_test_PET, y_pred_PET)
print('Accuracy Score:',metrics.accuracy_score(y_test_PET,y_pred_PET))

Accuracy Score: 0.9


In [None]:
# TODO

# Which feature set have more prediction power? Can you explain?

In [None]:
# TODO:
# Can you create a feature set containing both PET and CT features and repeat the classification step with the combined feature set? How such combination affect the prediction power?

In [None]:
# TODO:
# Can you train a Random Forest with the combined feature set and compare the result against the SVM?