In [None]:
import os
import time

# --- 1. Sirf is line ko change karein ---
# üî¥ Pehli baar 'images1_archive.tar.gz' likhein
#    Doosri baar 'images2_archive.tar.gz' likhein, etc.
ARCHIVE_FILE_NAME = "Copy of images_archive.tar.gz"

# --- 2. In paths ko hamesha same rehne dein ---
DRIVE_BASE_PATH = "/content/drive/MyDrive/images"
# Yeh hamesha same folder rahega taake images jama (collect) ho sakein
LOCAL_IMAGE_DIR = "/content/local_images"

# --- 3. Baqi code ab automatically kaam karega ---
DRIVE_TAR_PATH = os.path.join(DRIVE_BASE_PATH, ARCHIVE_FILE_NAME)
LOCAL_TAR_PATH = f"/content/{ARCHIVE_FILE_NAME}"

# Yeh line check karti hai ke folder hai ya nahi. Agar hai, to usay istemaal karti hai.
os.makedirs(LOCAL_IMAGE_DIR, exist_ok=True)
print(f"--- Setup Shuru ---")
print(f"Target Folder: {LOCAL_IMAGE_DIR}")

# --- FAST COPY ---
print(f"Copying {ARCHIVE_FILE_NAME}...")
start_time = time.time()
!cp "{DRIVE_TAR_PATH}" "{LOCAL_TAR_PATH}"
print(f"Copy complete in {(time.time() - start_time):.2f} seconds.")

# --- FAST UNPACK (ADD) ---
print(f"Extracting and ADDING images to {LOCAL_IMAGE_DIR}...")
start_time = time.time()
# Yeh command purani files ko delete nahi karta, sirf nayi files add karta hai
!tar -xzf "{LOCAL_TAR_PATH}" -C "{LOCAL_IMAGE_DIR}"
print(f"Extraction complete in {(time.time() - start_time):.2f} seconds.")

print(f"\n‚úÖ --- READY TO TRAIN! Images are in {LOCAL_IMAGE_DIR} ---")

--- Setup Shuru ---
Target Folder: /content/local_images
Copying Copy of images_archive.tar.gz...
Copy complete in 34.51 seconds.
Extracting and ADDING images to /content/local_images...
Extraction complete in 30.29 seconds.

‚úÖ --- READY TO TRAIN! Images are in /content/local_images ---


In [None]:
import os

# The path where you extracted the images
LOCAL_IMAGE_DIR = "/content/local_images"

try:
    file_count = len(os.listdir(LOCAL_IMAGE_DIR))
    print(f"‚úÖ Success! Found {file_count} images in '{LOCAL_IMAGE_DIR}'.")
except FileNotFoundError:
    print(f"‚ùå Error: Could not find the directory '{LOCAL_IMAGE_DIR}'.")
except Exception as e:
    print(f"An error occurred: {e}")

‚úÖ Success! Found 30051 images in '/content/local_images'.


In [None]:
import os
import cv2  # OpenCV (Colab mein pehle se install hota hai)
from tqdm.notebook import tqdm
import numpy as np # Numpy zaroori hai

# --- 1. Paths ---
SOURCE_DIR = "/content/local_images"
DEST_DIR = "/content/local_images_cropped"
os.makedirs(DEST_DIR, exist_ok=True)

# --- 2. Crop Setting ---
# Hum 75% istemaal karein ge taake "L" aur "PORTABLE" text zaroor cut jaayein
CROP_PERCENT = 0.75

print(f"'{SOURCE_DIR}' se images ko crop kiya ja raha hai...")
print(f"Nayi images '{DEST_DIR}' mein save hon gi...")
print(f"Cropping percentage: {int(CROP_PERCENT*100)}%")

image_files = os.listdir(SOURCE_DIR)
errors = 0
processed_count = 0

for filename in tqdm(image_files, desc="Cropping images"):
    source_path = os.path.join(SOURCE_DIR, filename)
    dest_path = os.path.join(DEST_DIR, filename)

    try:
        # Image ko OpenCV se load karein
        img = cv2.imread(source_path)
        if img is None:
            # print(f"Warning: {filename} load nahi ho saki, skip kar raha hoon.")
            errors += 1
            continue

        # Image ki height aur width lein
        h, w, _ = img.shape

        # --- Center Crop Logic ---
        new_h = int(h * CROP_PERCENT)
        new_w = int(w * CROP_PERCENT)

        start_y = (h - new_h) // 2
        start_x = (w - new_w) // 2
        end_y = start_y + new_h
        end_x = start_x + new_w

        # Image ko crop karein
        cropped_img = img[start_y:end_y, start_x:end_x]

        # Nayi image ko save karein
        cv2.imwrite(dest_path, cropped_img)
        processed_count += 1

    except Exception as e:
        # print(f"Error file {filename}: {e}")
        errors += 1

print("\n--- CROP MUKAMMAL! ---")
print(f"Total images found: {len(image_files)}")
print(f"Successfully processed and saved: {processed_count}")
print(f"Errors (skipped files): {errors}")

'/content/local_images' se images ko crop kiya ja raha hai...
Nayi images '/content/local_images_cropped' mein save hon gi...
Cropping percentage: 75%


Cropping images:   0%|          | 0/30051 [00:00<?, ?it/s]


--- CROP MUKAMMAL! ---
Total images found: 30051
Successfully processed and saved: 30051
Errors (skipped files): 0


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom, RandAugment
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os

# --- 1. Configuration (NAYI SETTINGS) ---
BATCH_SIZE = 64 # üî¥ 64 par waapas (taake tez ho)
IMG_WIDTH, IMG_HEIGHT = 256, 256 # üî¥ 300x300 se 256x256 karein (Speed ke liye)
EPOCHS_FINE_TUNE = 30
LEARNING_RATE_FINE_TUNE = 1e-5 # üî¥ Bohat Low LR (RandAugment ke liye)

# --- 2. Paths ---
# üî¥ Yaqeen karein ke yeh aapka 86.35% wala model hai
SAVED_WEIGHTS_PATH = '/content/drive/MyDrive/images/v5_randaugment_best(Final1).keras'
FINAL_MODEL_PATH = '/content/drive/MyDrive/images/v6_okay_final_256px.keras' # Naya naam
IMAGE_DIR = '/content/local_images_cropped' # Cropped folder
CLEAN_CSV_PATH = 'master_tf_clean.csv'

# --- 3. Class Weights and Names ---
# üî¥ Naye Class Weights (Viral = 4.0)
CLASS_WEIGHTS_DICT = {
    0: 1.38, 1: 0.83, 2: 0.33, 3: 2.23, 4: 4.0, 5: 7.15
}
CLASS_NAMES = ['COVID-19', 'Lung Opacity', 'Normal', 'Pneumonia (Bacterial)', 'Pneumonia (Viral)', 'Tuberculosis']
NUM_CLASSES = len(CLASS_NAMES)

# --- 4. Function Definitions ---

def load_and_preprocess(filepath, label):
    img = tf.io.read_file(filepath)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT]) # üî¥ Naya Size (256)
    img = tf.keras.applications.densenet.preprocess_input(img)
    return img, label

# RandAugment wala function
def build_dataset(df, augment=False):
    augmentation_layers = tf.keras.Sequential([
        RandomFlip("horizontal"),
        RandomRotation(0.1),
        RandomZoom(0.1),
        RandAugment(value_range=(0, 255), num_ops=2, factor=0.2)
    ], name="augmentation")

    dataset = tf.data.Dataset.from_tensor_slices((df['filepath'].values, df['label_idx'].values))
    dataset = dataset.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)

    if augment:
        dataset = dataset.map(lambda x, y: (augmentation_layers(x, training=True), y),
                              num_parallel_calls=tf.data.AUTOTUNE)

    dataset = dataset.batch(BATCH_SIZE) # üî¥ Naya Batch Size (64)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

# Model build function (Crash Fix ke saath)
def build_densenet_model():
    base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)) # üî¥ Naya Size (256)
    base_model.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(NUM_CLASSES, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model, base_model

# --- 5. Data Loading & Pipeline Execution ---
print(f"Loading data from {CLEAN_CSV_PATH}...")
df = pd.read_csv(CLEAN_CSV_PATH)
df['filepath'] = df['filename'].apply(lambda x: os.path.join(IMAGE_DIR, x))
class_indices = {name: i for i, name in enumerate(CLASS_NAMES)}
df['label_idx'] = df['label'].map(class_indices)
train_val_df, test_df = train_test_split(df, test_size=0.1, random_state=42, stratify=df['label'])
train_df, val_df = train_test_split(train_val_df, test_size=(len(test_df)/len(train_val_df)), random_state=42, stratify=train_val_df['label'])

# üî¥ Hum sirf Stage 2 (RandAugment) dataset banayein ge
train_dataset = build_dataset(train_df, augment=True)
val_dataset = build_dataset(val_df, augment=False)
test_dataset = build_dataset(test_df, augment=False)

# --- 6. Model Build and Weight Load (Skip Stage 1) ---
print("Building DenseNet-121 model structure (256x256)...")
model, base_model = build_densenet_model()

print(f"--- STAGE 1 SKIPPED ---")
print(f"Loading best weights (86.35%) from '{SAVED_WEIGHTS_PATH}'...")
try:
    model.load_weights(SAVED_WEIGHTS_PATH)
    print("‚úÖ Weights loaded successfully.")
except Exception as e:
    print(f"‚ùå ERROR: Weights load nahi huin. {e}")
    raise

# --- 7. STAGE 2: Smart Fine-Tuning (RandAugment ke saath) ---
print("\n--- STAGE 2: Smart Fine-tuning (RandAugment Enabled) ---")

base_model.trainable = True
for layer in base_model.layers[:347]: # Top layers ko fine-tune karein
    layer.trainable = False
print("‚úÖ DenseNet Base Model ki Top Layers Fine-tuning ke liye unfreeze kar di gayi hain.")

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE_FINE_TUNE), # üî¥ Bohat Low LR
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

early_stopping_s2 = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)
model_checkpoint_s2 = ModelCheckpoint(FINAL_MODEL_PATH, monitor='val_loss', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-7, verbose=1)

print("Starting fine-tuning with RandAugment (256x256)...")

history_fine_tune = model.fit(
    train_dataset, # üî¥ RandAugment dataset
    epochs=EPOCHS_FINE_TUNE,
    validation_data=val_dataset,
    callbacks=[early_stopping_s2, model_checkpoint_s2, reduce_lr],
    class_weight=CLASS_WEIGHTS_DICT
)

print("--- RandAugment Fine-Tuning Complete! ---")

Loading data from master_tf_clean.csv...
Building DenseNet-121 model structure (256x256)...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 0us/step
--- STAGE 1 SKIPPED ---
Loading best weights (86.35%) from '/content/drive/MyDrive/images/v5_randaugment_best(Final1).keras'...
‚úÖ Weights loaded successfully.

--- STAGE 2: Smart Fine-tuning (RandAugment Enabled) ---
‚úÖ DenseNet Base Model ki Top Layers Fine-tuning ke liye unfreeze kar di gayi hain.
Starting fine-tuning with RandAugment (256x256)...
Epoch 1/30
[1m376/376[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6589 - loss: 0.9030
Epoch 1: val_loss improved from inf to 0.65687, saving model to /content/drive/MyDrive/images/v6_okay_final_256px.keras
