In [None]:
# instead of: !pip install -q kaggle opencv-python-headless tensorflow matplotlib
import sys, subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "kaggle", "opencv-python-headless", "tensorflow", "matplotlib"])


In [None]:
# instead of: !mkdir -p ~/.kaggle
import os
os.makedirs(os.path.expanduser("~/.kaggle"), exist_ok=True)


In [None]:
# instead of: !cp kaggle.json ~/.kaggle/
import shutil
shutil.copy("kaggle.json", os.path.expanduser("~/.kaggle/kaggle.json"))


In [None]:
# instead of: !chmod 600 ~/.kaggle/kaggle.json
import os
os.chmod(os.path.expanduser("~/.kaggle/kaggle.json"), 0o600)


In [None]:
# instead of: !kaggle datasets download -d andrewmvd/lung-and-colon-cancer-histopathological-images
subprocess.check_call(["kaggle", "datasets", "download", "-d", "andrewmvd/lung-and-colon-cancer-histopathological-images"])


In [None]:
# instead of: !unzip -o lung-and-colon-cancer-histopathological-images.zip -d lung_cancer_dataset > /dev/null
import zipfile
with zipfile.ZipFile("lung-and-colon-cancer-histopathological-images.zip", "r") as zf:
    zf.extractall("lung_cancer_dataset")


In [None]:
# instead of: !ls lung_cancer_dataset
print(os.listdir("lung_cancer_dataset"))


In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"  # show outputs of all statements


In [None]:
# =====================================
# Lung Cancer Classification (Jupyter v1)
# =====================================

# 0. Install dependencies (only run once, restart kernel after install)
import sys, subprocess

def install_if_missing(pkg):
    try:
        __import__(pkg)
        print(f"✅ {pkg} already installed")
    except ImportError:
        print(f"📦 Installing {pkg} ...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])

for p in ["kaggle", "opencv-python-headless", "tensorflow", "matplotlib"]:
    install_if_missing(p)

# 1. Imports
import os, shutil, zipfile, random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import cv2
from tensorflow.keras.preprocessing import image
print("✅ TensorFlow:", tf.__version__)

# Make sure Jupyter shows all outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# ------------------------------
# 2. Kaggle Dataset Download
# ------------------------------
print("📥 Preparing Kaggle API...")

# Place kaggle.json in current folder before running
os.makedirs(os.path.expanduser("~/.kaggle"), exist_ok=True)
shutil.copy("kaggle.json", os.path.expanduser("~/.kaggle/kaggle.json"))
os.chmod(os.path.expanduser("~/.kaggle/kaggle.json"), 0o600)

print("📥 Downloading dataset...")
subprocess.check_call(["kaggle", "datasets", "download", "-d",
                       "andrewmvd/lung-and-colon-cancer-histopathological-images"])

print("📂 Unzipping dataset...")
with zipfile.ZipFile("lung-and-colon-cancer-histopathological-images.zip", "r") as zf:
    zf.extractall("lung_cancer_dataset")

print("📑 Dataset structure:")
print(os.listdir("lung_cancer_dataset"))

# ------------------------------
# 3. Data Generators
# ------------------------------
train_dir = "lung_cancer_dataset/train"
val_dir   = "lung_cancer_dataset/val"
test_dir  = "lung_cancer_dataset/test"

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen   = ImageDataGenerator(rescale=1./255)
test_datagen  = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    train_dir, target_size=(224,224), batch_size=16, class_mode='binary'
)
val_gen   = val_datagen.flow_from_directory(
    val_dir, target_size=(224,224), batch_size=16, class_mode='binary'
)
test_gen  = test_datagen.flow_from_directory(
    test_dir, target_size=(224,224), batch_size=16, class_mode='binary', shuffle=False
)

# ------------------------------
# 4. Model Setup (ResNet50)
# ------------------------------
print("🔧 Building ResNet50 model...")
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224,224,3))
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
out = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=out)
model.compile(optimizer=Adam(1e-3), loss="binary_crossentropy", metrics=["accuracy"])
model.summary()

# ------------------------------
# 5. Training (1 epoch only for debug)
# ------------------------------
print("🚀 Training for 1 epoch...")
history = model.fit(train_gen, validation_data=val_gen, epochs=1)

# ------------------------------
# 6. Evaluation
# ------------------------------
print("📊 Evaluating on test set...")
loss, acc = model.evaluate(test_gen)
print(f"✅ Test Accuracy: {acc*100:.2f}%")

# ------------------------------
# 7. Grad-CAM
# ------------------------------
def get_last_conv_layer(model):
    for layer in reversed(model.layers):
        if isinstance(layer, tf.keras.layers.Conv2D):
            return layer.name
    return None

def gradcam(img_path, model):
    last_conv = get_last_conv_layer(model)
    img = image.load_img(img_path, target_size=(224,224))
    arr = np.expand_dims(image.img_to_array(img)/255.0, axis=0)
    
    grad_model = tf.keras.models.Model([model.inputs], 
                                       [model.get_layer(last_conv).output, model.output])
    with tf.GradientTape() as tape:
        conv_out, pred = grad_model(arr)
        loss = pred[:,0]
    grads = tape.gradient(loss, conv_out)[0]
    pooled = tf.reduce_mean(grads, axis=(0,1))
    conv_out = conv_out[0]
    heatmap = np.dot(conv_out, pooled[..., tf.newaxis])
    heatmap = np.maximum(heatmap, 0)
    heatmap /= np.max(heatmap) + 1e-8
    heatmap = cv2.resize(heatmap.numpy(), (224,224))
    heatmap = np.uint8(255*heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    
    orig = cv2.imread(img_path)
    orig = cv2.resize(orig, (224,224))
    superimposed = cv2.addWeighted(orig, 0.6, heatmap, 0.4, 0)
    return superimposed

# pick one test image
sample_class = os.listdir(test_dir)[0]
sample_img = os.path.join(test_dir, sample_class, os.listdir(os.path.join(test_dir, sample_class))[0])
print("🖼 Sample image:", sample_img)

grad_img = gradcam(sample_img, model)

plt.imshow(cv2.cvtColor(grad_img, cv2.COLOR_BGR2RGB))
plt.axis("off")
plt.title("Grad-CAM Output")
plt.show()


In [None]:
# Tune learning rate + optimizer
from tensorflow.keras.optimizers import Adam, RMSprop

model.compile(
    optimizer=Adam(learning_rate=1e-4),   # instead of 1e-3
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

# Increase epochs + batch size tuning
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,              # run longer
    batch_size=32           # try 16 / 32 / 64
)


In [None]:
# Unfreeze last 30 layers of ResNet50
for layer in base_model.layers[-30:]:
    layer.trainable = True

# Re-compile with smaller LR (fine-tuning)
model.compile(optimizer=Adam(1e-5), loss="binary_crossentropy", metrics=["accuracy"])

# Fine-tune
history_finetune = model.fit(train_gen, validation_data=val_gen, epochs=5)


In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3),
    ModelCheckpoint("best_model.h5", save_best_only=True)
]

history = model.fit(train_gen, validation_data=val_gen, epochs=30, callbacks=callbacks)
