In [1]:
from google.colab import drive
drive.mount('/content/drive/')


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
!ls '/content/drive/MyDrive/data_syz'

0  1


In [3]:
!pip install pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg
!pip install --upgrade gdcm



In [4]:
!pip install -r requirements.txt



In [5]:
from ggl_data_load import read_image_df, read_metadata_df, split_dataset, load_and_process_images
from model import create_model
from training import train_model, augment_and_expand_dataset
from evaluation import evaluate_model
from ggl_save_info import save_model_info
import numpy as np
from visualization import plot_confusion_matrix, plot_training_history
import pprint
import pydicom
import os

# Load Data
data_dir = '/content/drive/MyDrive/data_syz/'
image_df = read_image_df(data_dir)
print(f'Here is the shape of the image df: {image_df.shape}')
print(f'Here is the dataframe itself: {image_df.head()}')
metadata_df = read_metadata_df(data_dir)
print(f'Here is the shape of the metadata df: {metadata_df.shape}')
print(f'Here is the dataframe itself: {metadata_df.head()}')

print('Data is read ready to split!')
train_df, val_df, test_df = split_dataset(image_df)

print(f'The datasets are ready! \n Train:{train_df.shape} \n Validation:{val_df.shape} \n Test:{test_df.shape}')

# Process Images & Metadata
train_images, train_metadata = load_and_process_images(train_df, metadata_df)
print(f'The shape of train images: {train_images.shape} and the shape of the metadata list: {train_metadata.shape}') #if there is a mismatch
train_labels = train_df['label'].values

val_images, val_metadata = load_and_process_images(val_df, metadata_df)
print(f'The shape of validation images: {val_images.shape} and the shape of the metadata list: {val_metadata.shape}')
val_labels = val_df['label'].values

test_images, test_metadata = load_and_process_images(test_df, metadata_df)
print(f'The shape of test images: {test_images.shape} and the shape of the metadata list: {test_metadata.shape}')
test_labels = test_df['label'].values


Loaded 6650 images from /content/drive/MyDrive/data_syz/
Here is the shape of the image df: (6650, 2)
Here is the dataframe itself:                                      file_path  label
0  /content/drive/MyDrive/data_syz/0/10211.dcm      0
1  /content/drive/MyDrive/data_syz/0/14195.dcm      0
2  /content/drive/MyDrive/data_syz/0/16221.dcm      0
3  /content/drive/MyDrive/data_syz/0/12044.dcm      0
4  /content/drive/MyDrive/data_syz/0/10935.dcm      0
Here is the shape of the metadata df: (6650, 7)
Here is the dataframe itself:                                      file_path  SliceThickness  RescaleSlope  \
0  /content/drive/MyDrive/data_syz/0/10211.dcm             5.0           1.0   
1  /content/drive/MyDrive/data_syz/0/14195.dcm             5.0           1.0   
2  /content/drive/MyDrive/data_syz/0/16221.dcm             5.0           1.0   
3  /content/drive/MyDrive/data_syz/0/12044.dcm             5.0           1.0   
4  /content/drive/MyDrive/data_syz/0/10935.dcm             4.0    

In [None]:
from visualization import plot_confusion_matrix, plot_training_history, plot_learning_rate

# Train & Evaluate Model
model = create_model(input_shape=train_images.shape[1:])
print('Here is the base model architecture:')
#print(base_model.summary())
print('------------*****------------*****------------*****------------*****------------*****------------\n')

print('Here is the model architecture:')
print(model.summary())
print('------------*****------------*****------------*****------------*****------------*****------------\n')


# history = train_model(model, train_images, train_metadata, train_labels, val_images, val_metadata, val_labels, epochs = 50)
history = train_model(model, train_images_aug, train_metadata_aug, train_labels_aug, val_images, val_metadata, val_labels, batch_size=32, epochs=25)

metrics = evaluate_model(model, test_images, test_metadata, test_labels)

print('------------*****------------*****------------*****------------*****------------*****------------\n')

pp = pprint.PrettyPrinter(indent=4)

# Print the evaluation metrics in a pretty format
print("Evaluation Metrics:")
pp.pprint(metrics)

print('------------*****------------*****------------*****------------*****------------*****------------\n')

plot_confusion_matrix(metrics["confusion_matrix"])
plot_training_history(history)
plot_learning_rate(history)

save_model_info(model, metrics, history.epoch, [0.001, 0.0001])

Here is the base model architecture:
------------*****------------*****------------*****------------*****------------*****------------

Here is the model architecture:


None
------------*****------------*****------------*****------------*****------------*****------------

Epoch 1/25
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step - accuracy: 0.7468 - loss: 1.1703Epoch 1: Learning Rate = 0.000050
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 208ms/step - accuracy: 0.7470 - loss: 1.1700 - val_accuracy: 0.9327 - val_loss: 0.8573 - learning_rate: 5.0000e-05 - lr: 5.0000e-05
Epoch 2/25
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.9499 - loss: 0.8227Epoch 2: Learning Rate = 0.000050
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 74ms/step - accuracy: 0.9499 - loss: 0.8226 - val_accuracy: 0.9476 - val_loss: 0.7799 - learning_rate: 5.0000e-05 - lr: 5.0000e-05
Epoch 3/25
[1m372/373[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 70ms/step - accuracy: 0.9837 - loss: 0.7112Epoch 3: Learning Rate = 0.000050
[1m373/373[0m [32m━━━━━━━━━━━━━━

In [None]:
model.save("/content/my_saved_model")

In [None]:
from tensorflow.keras.models import load_model

# Load from disk
model = load_model("/content/my_saved_model")


In [None]:
from google.colab import files
!zip -r my_saved_model.zip /content/my_saved_model
files.download('my_saved_model_1.zip')

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# Predict probabilities
pred_probs = model.predict([new_images, new_metadata], batch_size=32)

# Threshold at 0.5 to get class predictions
pred_labels = (pred_probs.flatten() >= 0.5).astype(int)

# Evaluation metrics
acc = accuracy_score(true_labels, pred_labels)
f1 = f1_score(true_labels, pred_labels)
roc_auc = roc_auc_score(true_labels, pred_probs)

print(f"Accuracy: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")


In [None]:
i = 0  # Index of image
single_img = np.expand_dims(new_images[i], axis=0)       # shape (1, 299, 299, 3)
single_meta = np.expand_dims(new_metadata[i], axis=0)    # shape (1, 2)

pred_prob = model.predict([single_img, single_meta])[0][0]
pred_label = int(pred_prob >= 0.5)

print(f"True Label: {true_labels[i]}, Predicted: {pred_label}, Probability: {pred_prob:.4f}")
