In [None]:
import os
import time

# --- 1. Sirf is line ko change karein ---
# üî¥ Pehli baar 'images1_archive.tar.gz' likhein
#    Doosri baar 'images2_archive.tar.gz' likhein, etc.
ARCHIVE_FILE_NAME = "Copy of images_archive.tar.gz"

# --- 2. In paths ko hamesha same rehne dein ---
DRIVE_BASE_PATH = "/content/drive/MyDrive/images"
# Yeh hamesha same folder rahega taake images jama (collect) ho sakein
LOCAL_IMAGE_DIR = "/content/local_images"

# --- 3. Baqi code ab automatically kaam karega ---
DRIVE_TAR_PATH = os.path.join(DRIVE_BASE_PATH, ARCHIVE_FILE_NAME)
LOCAL_TAR_PATH = f"/content/{ARCHIVE_FILE_NAME}"

# Yeh line check karti hai ke folder hai ya nahi. Agar hai, to usay istemaal karti hai.
os.makedirs(LOCAL_IMAGE_DIR, exist_ok=True)
print(f"--- Setup Shuru ---")
print(f"Target Folder: {LOCAL_IMAGE_DIR}")

# --- FAST COPY ---
print(f"Copying {ARCHIVE_FILE_NAME}...")
start_time = time.time()
!cp "{DRIVE_TAR_PATH}" "{LOCAL_TAR_PATH}"
print(f"Copy complete in {(time.time() - start_time):.2f} seconds.")

# --- FAST UNPACK (ADD) ---
print(f"Extracting and ADDING images to {LOCAL_IMAGE_DIR}...")
start_time = time.time()
# Yeh command purani files ko delete nahi karta, sirf nayi files add karta hai
!tar -xzf "{LOCAL_TAR_PATH}" -C "{LOCAL_IMAGE_DIR}"
print(f"Extraction complete in {(time.time() - start_time):.2f} seconds.")

print(f"\n‚úÖ --- READY TO TRAIN! Images are in {LOCAL_IMAGE_DIR} ---")

--- Setup Shuru ---
Target Folder: /content/local_images
Copying Copy of images_archive.tar.gz...
Copy complete in 29.45 seconds.
Extracting and ADDING images to /content/local_images...
Extraction complete in 25.02 seconds.

‚úÖ --- READY TO TRAIN! Images are in /content/local_images ---


In [None]:
import os

# The path where you extracted the images
LOCAL_IMAGE_DIR = "/content/local_images"

try:
    file_count = len(os.listdir(LOCAL_IMAGE_DIR))
    print(f"‚úÖ Success! Found {file_count} images in '{LOCAL_IMAGE_DIR}'.")
except FileNotFoundError:
    print(f"‚ùå Error: Could not find the directory '{LOCAL_IMAGE_DIR}'.")
except Exception as e:
    print(f"An error occurred: {e}")

‚úÖ Success! Found 30051 images in '/content/local_images'.


In [None]:
import os
import cv2  # OpenCV (Colab mein pehle se install hota hai)
from tqdm.notebook import tqdm
import numpy as np # Numpy zaroori hai

# --- 1. Paths ---
SOURCE_DIR = "/content/local_images"
DEST_DIR = "/content/local_images_cropped"
os.makedirs(DEST_DIR, exist_ok=True)

# --- 2. Crop Setting ---
# Hum 75% istemaal karein ge taake "L" aur "PORTABLE" text zaroor cut jaayein
CROP_PERCENT = 0.75

print(f"'{SOURCE_DIR}' se images ko crop kiya ja raha hai...")
print(f"Nayi images '{DEST_DIR}' mein save hon gi...")
print(f"Cropping percentage: {int(CROP_PERCENT*100)}%")

image_files = os.listdir(SOURCE_DIR)
errors = 0
processed_count = 0

for filename in tqdm(image_files, desc="Cropping images"):
    source_path = os.path.join(SOURCE_DIR, filename)
    dest_path = os.path.join(DEST_DIR, filename)

    try:
        # Image ko OpenCV se load karein
        img = cv2.imread(source_path)
        if img is None:
            # print(f"Warning: {filename} load nahi ho saki, skip kar raha hoon.")
            errors += 1
            continue

        # Image ki height aur width lein
        h, w, _ = img.shape

        # --- Center Crop Logic ---
        new_h = int(h * CROP_PERCENT)
        new_w = int(w * CROP_PERCENT)

        start_y = (h - new_h) // 2
        start_x = (w - new_w) // 2
        end_y = start_y + new_h
        end_x = start_x + new_w

        # Image ko crop karein
        cropped_img = img[start_y:end_y, start_x:end_x]

        # Nayi image ko save karein
        cv2.imwrite(dest_path, cropped_img)
        processed_count += 1

    except Exception as e:
        # print(f"Error file {filename}: {e}")
        errors += 1

print("\n--- CROP MUKAMMAL! ---")
print(f"Total images found: {len(image_files)}")
print(f"Successfully processed and saved: {processed_count}")
print(f"Errors (skipped files): {errors}")

'/content/local_images' se images ko crop kiya ja raha hai...
Nayi images '/content/local_images_cropped' mein save hon gi...
Cropping percentage: 75%


Cropping images:   0%|          | 0/30051 [00:00<?, ?it/s]


--- CROP MUKAMMAL! ---
Total images found: 30051
Successfully processed and saved: 30051
Errors (skipped files): 0


In [None]:
import tensorflow as tf
from tensorflow.keras import mixed_precision
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os

# --- üöÄ SPEED HACK: Keep Mixed Precision On ---
mixed_precision.set_global_policy('mixed_float16')

# --- 1. Configuration ---
# üî¥ MUST Match Part 1
IMG_WIDTH, IMG_HEIGHT = 224, 224
BATCH_SIZE = 32

# Paths
LOAD_WEIGHTS_PATH = '/content/drive/MyDrive/images/Copy of efficientnet_b0_fast.keras' # Output from Part 1
FINAL_MODEL_PATH = '/content/drive/MyDrive/images/efficientnet_b0_polished.keras' # Final Output
CLEAN_CSV_PATH = '/content/drive/MyDrive/images/master_tf_clean.csv'
IMAGE_DIR = '/content/local_images_cropped'

# Class Weights (Same as before)
CLASS_WEIGHTS_DICT = {0: 1.38, 1: 0.83, 2: 1.0, 3: 2.23, 4: 4.0, 5: 5.0}
CLASS_NAMES = ['COVID-19', 'Lung Opacity', 'Normal', 'Pneumonia (Bacterial)', 'Pneumonia (Viral)', 'Tuberculosis']
NUM_CLASSES = len(CLASS_NAMES)

# --- 2. Simple Augmentation (Polishing Mode) ---
# Less aggressive than Part 1. Helps the model settle.
simple_augmentation = tf.keras.Sequential([
    RandomFlip("horizontal"),
    RandomRotation(0.1), # Very slight rotation
    RandomZoom(0.1),     # Very slight zoom
], name="simple_augmentation")

def load_and_preprocess(filepath, label):
    img = tf.io.read_file(filepath)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])
    img = tf.keras.applications.efficientnet.preprocess_input(img)
    return img, label

def build_dataset(df, augment=False):
    dataset = tf.data.Dataset.from_tensor_slices((df['filepath'].values, df['label_idx'].values))
    dataset = dataset.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if augment:
        # Use Simple Augmentation here
        dataset = dataset.map(lambda x, y: (simple_augmentation(x, training=True), y),
                              num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return dataset

# --- 3. Data Split ---
df = pd.read_csv(CLEAN_CSV_PATH)
df['filepath'] = df['filename'].apply(lambda x: os.path.join(IMAGE_DIR, x))
df['label_idx'] = df['label'].map({name: i for i, name in enumerate(CLASS_NAMES)})

train_val_df, test_df = train_test_split(df, test_size=0.1, random_state=42, stratify=df['label'])
train_df, val_df = train_test_split(train_val_df, test_size=(len(test_df)/len(train_val_df)), random_state=42, stratify=train_val_df['label'])

train_dataset = build_dataset(train_df, augment=True) # Simple Aug
val_dataset = build_dataset(val_df, augment=False)

# --- 4. Rebuild Model (Exact Replica of Part 1) ---
print(f"Building EfficientNetB0 ({IMG_WIDTH}x{IMG_HEIGHT})...")
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
# Mixed Precision require float32 output
predictions = Dense(NUM_CLASSES, activation='softmax', dtype='float32')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# --- 5. Load Weights from Part 1 ---
if os.path.exists(LOAD_WEIGHTS_PATH):
    print(f"Loading weights from Part 1: {LOAD_WEIGHTS_PATH}")
    model.load_weights(LOAD_WEIGHTS_PATH)
else:
    raise FileNotFoundError(f"Part 1 file not found at {LOAD_WEIGHTS_PATH}. Did Part 1 finish?")

# --- 6. Polishing Training ---
print("\n--- Starting Polishing Phase (Low LR, Simple Augmentation) ---")

base_model.trainable = True

# üî¥ CRITICAL: Keep BatchNormalization layers Frozen!
for layer in base_model.layers:
    if isinstance(layer, BatchNormalization):
        layer.trainable = False

# üî¥ Low Learning Rate (1e-5)
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

callbacks = [
    EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True, verbose=1),
    ModelCheckpoint(FINAL_MODEL_PATH, monitor='val_loss', save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)
]

history = model.fit(
    train_dataset,
    epochs=25, # Usually finishes earlier due to EarlyStopping
    validation_data=val_dataset,
    callbacks=callbacks,
    class_weight=CLASS_WEIGHTS_DICT
)

print(f"‚úÖ Final Polished Model saved to {FINAL_MODEL_PATH}")

Building EfficientNetB0 (224x224)...
Loading weights from Part 1: /content/drive/MyDrive/images/Copy of efficientnet_b0_fast.keras

--- Starting Polishing Phase (Low LR, Simple Augmentation) ---
Epoch 1/25
[1m751/751[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 652ms/step - accuracy: 0.9236 - loss: 0.3321
Epoch 1: val_loss improved from inf to 0.35735, saving model to /content/drive/MyDrive/images/efficientnet_b0_polished.keras
[1m751/751[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m712s[0m 714ms/step - accuracy: 0.9236 - loss: 0.3321 - val_accuracy: 0.8765 - val_loss: 0.3574 - learning_rate: 1.0000e-05
Epoch 2/25
[1m751/751[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 453ms/step - accuracy: 0.9253 - loss: 0.3160
Epoch 2: val_loss did not improve from 0.35735
[1m751/751[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import os
from tqdm.notebook import tqdm

# --- Configuration (Set Your Parameters) ---
# üî¥ Yaqeen karein ke yeh aapka 88.05% wala model hai
FINAL_MODEL_PATH = '/content/drive/MyDrive/images/v6_polished_best.keras'
IMAGE_DIR = '/content/local_images_cropped'
CLEAN_CSV_PATH = '/content/drive/MyDrive/images/master_tf_clean.csv'
BATCH_SIZE = 32 # Evaluation ke liye 32 theek hai
IMG_WIDTH, IMG_HEIGHT = 300, 300

CLASS_NAMES = ['COVID-19', 'Lung Opacity', 'Normal', 'Pneumonia (Bacterial)', 'Pneumonia (Viral)', 'Tuberculosis']
NUM_CLASSES = len(CLASS_NAMES)

# --- 1. Data Loading & Pipeline Execution ---
print(f"Loading final model and data...")
df = pd.read_csv(CLEAN_CSV_PATH)
df['filepath'] = df['filename'].apply(lambda x: os.path.join(IMAGE_DIR, x))
class_indices = {name: i for i, name in enumerate(CLASS_NAMES)}
df['label_idx'] = df['label'].map(class_indices)
train_val_df, test_df = train_test_split(df, test_size=0.1, random_state=42, stratify=df['label'])
_, test_df = train_test_split(train_val_df, test_size=(len(test_df)/len(train_val_df)), random_state=42, stratify=train_val_df['label'])

# --- Build Dataset Function (Wohi Jo Training Mein Tha) ---
def load_and_preprocess(filepath, label):
    img = tf.io.read_file(filepath)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])
    img = tf.keras.applications.densenet.preprocess_input(img)
    return img, label

def build_dataset(df): # Augmentation 'False' hai
    dataset = tf.data.Dataset.from_tensor_slices((df['filepath'].values, df['label_idx'].values))
    dataset = dataset.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

test_dataset = build_dataset(test_df)

# --- 2. Model Load and Evaluation ---
print(f"Loading best weights (88.05% Peak) from '{FINAL_MODEL_PATH}'...")
try:
    # Model ko uske structure ke saath load karein
    model = tf.keras.models.load_model(FINAL_MODEL_PATH)
    print("‚úÖ Model loaded successfully.")
except Exception as e:
    # Agar load_model fail ho to manual tareeqa istemaal karein
    print(f"‚ùå Model load error: {e}. Manual build kar raha hoon...")

    # Manual build (aapka purana function)
    def build_densenet_model():
        base_model = DenseNet121(weights=None, include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
        base_model.trainable = True
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.5)(x)
        predictions = Dense(NUM_CLASSES, activation='softmax')(x)
        return Model(inputs=base_model.input, outputs=predictions)

    model = build_densenet_model()
    model.load_weights(FINAL_MODEL_PATH)
    print("‚úÖ Model manually loaded.")

# --- 3. Final Prediction & Report ---
print("\nGenerating classification report and confusion matrix...")
# Test dataset se predictions aur labels collect karein
y_true = []
y_pred_probs = []

for images, labels in tqdm(test_dataset, desc="Generating Predictions"):
    y_true.extend(labels.numpy())
    y_pred_probs.extend(model.predict(images, verbose=0))

y_pred = np.argmax(y_pred_probs, axis=1)

print("\n--- Final Test Set Evaluation (V6 Polished Model) ---")
print(f"Total Test Samples: {len(y_true)}")

print("\n--- Confusion Matrix ---")
print(confusion_matrix(y_true, y_pred))

print("\n--- Classification Report (Final) ---")
print(classification_report(y_true, y_pred, target_names=CLASS_NAMES))

Loading final model and data...
Loading best weights (88.05% Peak) from '/content/drive/MyDrive/images/v6_polished_best.keras'...
‚úÖ Model loaded successfully.

Generating classification report and confusion matrix...


Generating Predictions:   0%|          | 0/94 [00:00<?, ?it/s]


--- Final Test Set Evaluation (V6 Polished Model) ---
Total Test Samples: 3004

--- Confusion Matrix ---
[[ 301   10   41    0    2    8]
 [  18  435  137    0    2    9]
 [   3   11 1464    6    8    0]
 [   0    0    3  202   19    0]
 [   0    0    8   66  181    0]
 [   2    0    4    0    0   64]]

--- Classification Report (Final) ---
                       precision    recall  f1-score   support

             COVID-19       0.93      0.83      0.88       362
         Lung Opacity       0.95      0.72      0.82       601
               Normal       0.88      0.98      0.93      1492
Pneumonia (Bacterial)       0.74      0.90      0.81       224
    Pneumonia (Viral)       0.85      0.71      0.78       255
         Tuberculosis       0.79      0.91      0.85        70

             accuracy                           0.88      3004
            macro avg       0.86      0.84      0.84      3004
         weighted avg       0.89      0.88      0.88      3004

