In [None]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
!pip install pydicom
!pip install opencv-python
!pip install pillow # optional
!pip install pandas
!pip3 install numpy
!pip3 install dicom2nifti
!pip3 install nibabel
!pip3 install pydicom
!pip3 install tqdm
!pip3 install nilearn
!pip install --quiet torchio==0.18.90

In [None]:
import pathlib as plb
import os
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import shutil

In [None]:
labels_mapping = {
    'lung_cancer': 0,
    'lymphoma': 1,
    'melanoma': 2
}

In [None]:
def convert_to_yolo_format(label_str, x_min, x_max, y_min, y_max, image_width, image_height):
    # Calculate the center coordinates
    x_center = (x_min + x_max) / 2.0
    y_center = (y_min + y_max) / 2.0

    # Calculate the width and height of the bounding box
    w = x_max - x_min
    h = y_max - y_min

    # Normalize the coordinates and dimensions relative to the image width and height
    x_center /= image_width
    y_center /= image_height
    w /= image_width
    h /= image_height

    label = labels_mapping[label_str]

    # Return the YOLO-formatted string
    yolo_format = f"{label} {x_center} {y_center} {w} {h}\n"

    return yolo_format

In [None]:
# get master split
data_folder = plb.Path('/content/drive/MyDrive/Capstone_GE_DSI_CV_Project/preprocessed_data')
data_split_csv = data_folder / 'train_test_split_master_v2.csv'
labels_csv = data_folder / 'k_means2' / 'SUV_labels.csv'
yolo_labels_folder = data_folder / 'YOLO_kmeans_labels'

In [None]:
suv_folder = yolo_labels_folder / 'SUV'
petct_folder = yolo_labels_folder / 'PETCT_stacked'

In [None]:
for split in split_folders:
  shutil.rmtree(suv_folder / split, ignore_errors=True)
  shutil.rmtree(petct_folder / split, ignore_errors=True)

In [None]:
suv_folder = plb.Path('SUV')
petct_folder = plb.Path('PETCT_stacked')

In [None]:
split_folders = ['train', 'test', 'val']

for split in split_folders:
  (suv_folder / split).mkdir(parents=True, exist_ok=True)
  (petct_folder / split).mkdir(parents=True, exist_ok=True)

In [None]:
split_df = pd.read_csv(data_split_csv)
split_df

Unnamed: 0,file_name,cancer_type,split
0,PETCT_2dac5ef654_axial_277.jpg,lung_cancer,train
1,PETCT_bf178a41b2_axial_234.jpg,lung_cancer,train
2,PETCT_37952b7ffb_axial_277.jpg,lung_cancer,train
3,PETCT_e03b96666f_axial_103.jpg,lung_cancer,train
4,PETCT_760c77b289_axial_259.jpg,lung_cancer,train
...,...,...,...
31615,PETCT_6016a6c3af_axial_180.jpg,negative,test
31616,PETCT_a41d59682f_axial_450.jpg,negative,test
31617,PETCT_e00c98b415_axial_270.jpg,negative,test
31618,PETCT_14929994cf_axial_390.jpg,negative,test


In [None]:
split_df[split_df['cancer_type'] == 'negative']

Unnamed: 0,file_name,cancer_type,split
23620,PETCT_64aff75516_axial_310.jpg,negative,train
23621,PETCT_3c94a00f90_axial_790.jpg,negative,train
23622,PETCT_8b73608326_axial_170.jpg,negative,train
23623,PETCT_a22ec7f62b_axial_730.jpg,negative,train
23624,PETCT_f0e1b89b41_axial_340.jpg,negative,train
...,...,...,...
31615,PETCT_6016a6c3af_axial_180.jpg,negative,test
31616,PETCT_a41d59682f_axial_450.jpg,negative,test
31617,PETCT_e00c98b415_axial_270.jpg,negative,test
31618,PETCT_14929994cf_axial_390.jpg,negative,test


In [None]:
labels_df = pd.read_csv(labels_csv)
labels_df

Unnamed: 0,img_filename,x_min,y_min,x_max,y_max,cancer_type,img_width,img_height
0,PETCT_0b98dbe00d_axial_054.jpg,168,225.0,186.0,245.0,lung_cancer,408,408.1
1,PETCT_0b98dbe00d_axial_055.jpg,164,223.0,188.0,246.0,lung_cancer,408,408.0
2,PETCT_0b98dbe00d_axial_056.jpg,164,223.0,188.0,246.0,lung_cancer,408,408.0
3,PETCT_0b98dbe00d_axial_057.jpg,163,223.0,188.0,248.0,lung_cancer,408,408.0
4,PETCT_0b98dbe00d_axial_058.jpg,165,223.0,187.0,248.0,lung_cancer,408,408.0
...,...,...,...,...,...,...,...,...
55065,PETCT_f37014ec85_axial_545.jpg,214,158.0,235.0,178.0,melanoma,408,408.0
55066,PETCT_f37014ec85_axial_546.jpg,214,159.0,233.0,177.0,melanoma,408,408.0
55067,PETCT_f37014ec85_axial_547.jpg,214,159.0,233.0,175.0,melanoma,408,408.0
55068,PETCT_f37014ec85_axial_548.jpg,214,159.0,233.0,175.0,melanoma,408,408.0


In [None]:
cols_to_use = labels_df.columns.difference(split_df.columns)

merged_df = split_df.merge(labels_df[cols_to_use], left_on='file_name', right_on='img_filename', how='left').dropna().drop(33613)
merged_df.drop('img_filename', axis=1, inplace=True)
merged_df

Unnamed: 0,file_name,cancer_type,split,img_height,img_width,x_max,x_min,y_max,y_min
0,PETCT_2dac5ef654_axial_277.jpg,lung_cancer,train,408.0,408.0,226.0,204.0,190.0,165.0
1,PETCT_bf178a41b2_axial_234.jpg,lung_cancer,train,408.0,408.0,199.0,191.0,193.0,185.0
2,PETCT_bf178a41b2_axial_234.jpg,lung_cancer,train,408.0,408.0,172.0,166.0,148.0,143.0
3,PETCT_bf178a41b2_axial_234.jpg,lung_cancer,train,408.0,408.0,192.0,186.0,192.0,182.0
4,PETCT_bf178a41b2_axial_234.jpg,lung_cancer,train,408.0,408.0,199.0,189.0,186.0,181.0
...,...,...,...,...,...,...,...,...,...
32152,PETCT_1285b86bea_axial_325.jpg,melanoma,test,408.0,408.0,211.0,194.0,204.0,181.0
32153,PETCT_1285b86bea_axial_325.jpg,melanoma,test,408.0,408.0,182.0,164.0,172.0,139.0
32154,PETCT_1285b86bea_axial_325.jpg,melanoma,test,408.0,408.0,258.0,204.0,292.0,249.0
32155,PETCT_1285b86bea_axial_325.jpg,melanoma,test,408.0,408.0,197.0,164.0,272.0,223.0


In [None]:
# generate k means labels

for row in tqdm(merged_df.itertuples()):
  split = row.split
  label = row.cancer_type
  x_min = row.x_min
  x_max = row.x_max
  y_min = row.y_min
  y_max = row.y_max
  image_width = row.img_width
  image_height = row.img_height
  filename = row.file_name

  text = convert_to_yolo_format(label, x_min, x_max, y_min, y_max, image_width, image_height)

  # suv_label
  # suv_label_filename = f'{filename[:-4]}.txt'

  # with open(suv_folder / split / suv_label_filename, 'a+') as f:
      # f.write(text)

  # stacked_label
  petct_filename = filename[:-4].split('_')
  petct_filename.insert(2 , 'PETCTstacked')
  petct_filename = '_'.join(petct_filename)
  # print(petct_filename)

  petct_label_filename = f'{petct_filename}.txt'

  with open(petct_folder / split / petct_label_filename, 'a+') as g:
      g.write(text)


0it [00:00, ?it/s]

In [None]:
from google.colab import auth
auth.authenticate_user()
project_id = 'hybrid-elixir-402923'
!gcloud config set project {project_id}
!gsutil ls

Updated property [core/project].
gs://petct_preprocessed_data/


In [None]:
suv_folder

PosixPath('/content/drive/MyDrive/Capstone_GE_DSI_CV_Project/preprocessed_data/YOLO_kmeans_labels/SUV')

In [None]:
bucket_name = 'petct_preprocessed_data'

In [None]:
!gsutil -m cp -r gs://{bucket_name}/CT_YOLO_v3/CT_YoLo/data/images gs://{bucket_name}/CT_YOLO_kmeans/data/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying gs://petct_preprocessed_data/CT_YOLO_v3/CT_YoLo/data/images/val/PETCT_5d10be5b89_axial_191.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/CT_YOLO_v3/CT_YoLo/data/images/val/PETCT_5d10be5b89_axial_192.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/CT_YOLO_v3/CT_YoLo/data/images/val/PETCT_5d10be5b89_axial_195.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/CT_YOLO_v3/CT_YoLo/data/images/val/PETCT_5d10be5b89_axial_196.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/CT_YOLO_v3/CT_YoLo/data/images/val/PETCT_5d10be5b89_axial_213.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/CT_YOLO_v3/CT_YoLo/data/images/val/PETCT_5d10be5b89_axial_218.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/CT_YOLO_v3/CT_YoLo/data/images/val/PETCT_5d10be5b89_axial_219.jpg [Content-Type=image/jpeg]...
Copying gs:

In [None]:
!gsutil -m cp -r {suv_folder}/* gs://{bucket_name}/CT_YOLO_kmeans/data/labels

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying file:///content/drive/MyDrive/Capstone_GE_DSI_CV_Project/preprocessed_data/YOLO_kmeans_labels/SUV/val/PETCT_5e2da717db_axial_306.txt [Content-Type=text/plain]...
Copying file:///content/drive/MyDrive/Capstone_GE_DSI_CV_Project/preprocessed_data/YOLO_kmeans_labels/SUV/val/PETCT_0e2034240b_axial_232.txt [Content-Type=text/plain]...
Copying file:///content/drive/MyDrive/Capstone_GE_DSI_CV_Project/preprocessed_data/YOLO_kmeans_labels/SUV/val/PETCT_3b1c9155f5_axial_089.txt [Content-Type=text/plain]...
Copying file:///content/drive/MyDrive/Capstone_GE_DSI_CV_Project/preprocessed_data/YOLO_kmeans_labels/SUV/val/PETCT_3b73c2480a_axial_246.txt [Content-Type=text/plain]...
Copying file:///content/drive/MyDrive/Capstone_GE_DSI_CV_Project/preprocessed_data/YOLO_kmeans_labels/SUV/val/PETCT_40f0749cb7_axial_291.txt [Content-Type=text/plain]...
Copying file:///content/drive/MyDrive/Capstone_GE_DSI_CV_Project/preprocessed_data/YO

In [None]:
!gsutil -m cp -r gs://{bucket_name}/SUV_YoLo/data/images gs://{bucket_name}/SUV_YOLO_kmeans_v2/data/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying gs://petct_preprocessed_data/SUV_YoLo/data/images/val/PETCT_5d10be5b89_axial_192.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/SUV_YoLo/data/images/val/PETCT_5d10be5b89_axial_177.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/SUV_YoLo/data/images/val/PETCT_5d10be5b89_axial_181.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/SUV_YoLo/data/images/val/PETCT_5d10be5b89_axial_191.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/SUV_YoLo/data/images/val/PETCT_5d10be5b89_axial_195.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/SUV_YoLo/data/images/val/PETCT_5d10be5b89_axial_196.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/SUV_YoLo/data/images/val/PETCT_5d10be5b89_axial_213.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/SUV_YoLo/data/images/val/PETCT_5d10be5b89_ax

In [None]:
!gsutil -m cp -r gs://{bucket_name}/CT_YOLO_kmeans/data/labels gs://{bucket_name}/SUV_YOLO_kmeans_v2/data/labels/

In [None]:
!gsutil -m cp -r gs://{bucket_name}/PETCT_stacked/YOLO/data/images gs://{bucket_name}/PETCT_stacked_YOLO_kmeans/data/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying gs://petct_preprocessed_data/PETCT_stacked/YOLO/data/images/val/PETCT_5d10be5b89_PETCTstacked_axial_180.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/PETCT_stacked/YOLO/data/images/val/PETCT_5d10be5b89_PETCTstacked_axial_181.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/PETCT_stacked/YOLO/data/images/val/PETCT_5d10be5b89_PETCTstacked_axial_191.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/PETCT_stacked/YOLO/data/images/val/PETCT_5d10be5b89_PETCTstacked_axial_192.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/PETCT_stacked/YOLO/data/images/val/PETCT_5d10be5b89_PETCTstacked_axial_195.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/PETCT_stacked/YOLO/data/images/val/PETCT_5d10be5b89_PETCTstacked_axial_196.jpg [Content-Type=image/jpeg]...
Copying gs://petct_preprocessed_data/PETCT_stacked/YOLO/data/imag

In [None]:
!gsutil -m cp -r {petct_folder}/* gs://{bucket_name}/PETCT_stacked_YOLO_kmeans/data/labels/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying file://PETCT_stacked/val/PETCT_0ea07b421b_PETCTstacked_axial_287.txt [Content-Type=text/plain]...
Copying file://PETCT_stacked/val/PETCT_29ab45ef17_PETCTstacked_axial_090.txt [Content-Type=text/plain]...
Copying file://PETCT_stacked/val/PETCT_5d10be5b89_PETCTstacked_axial_104.txt [Content-Type=text/plain]...
Copying file://PETCT_stacked/val/PETCT_fe705ea1cc_PETCTstacked_axial_073.txt [Content-Type=text/plain]...
Copying file://PETCT_stacked/val/PETCT_2e97a9e5c2_PETCTstacked_axial_334.txt [Content-Type=text/plain]...
Copying file://PETCT_stacked/val/PETCT_ef9d41b836_PETCTstacked_axial_257.txt [Content-Type=text/plain]...
Copying file://PETCT_stacked/val/PETCT_a4ff5d0d9d_PETCTstacked_axial_590.txt [Content-Type=text/plain]...
Copying file://PETCT_stacked/val/PETCT_4848bebb10_PETCTstacked_axial_204.txt [Content-Type=text/plain]...
Copying file://PETCT_stacked/val/PETCT_e03b96666f_PETCTstacked_axial_060.txt [Content-T