### 1. Mount Google Drive v√† Setup

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install dependencies
!pip install tensorflow pillow scikit-learn matplotlib seaborn

In [None]:
# Import libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
import numpy as np
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

### 2. Configuration

In [None]:
# ƒê∆Ø·ªúNG D·∫™N S·∫º ƒê∆Ø·ª¢C C·∫¨P NH·∫¨T ·ªû CELL TR√äN
# Sau khi ch·∫°y c√°c cell ki·ªÉm tra, quay l·∫°i ƒë√¢y ƒë·ªÉ x√°c nh·∫≠n

# Data directories
DATA_DIR = os.path.join(BASE_DIR, 'data', 'processed')
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VAL_DIR = os.path.join(DATA_DIR, 'val')
TEST_DIR = os.path.join(DATA_DIR, 'test')

# Model directory
MODEL_DIR = os.path.join(BASE_DIR, 'models')
os.makedirs(MODEL_DIR, exist_ok=True)

# Hyperparameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
EPOCHS = 20
NUM_CLASSES = 2
DROPOUT_RATE = 0.5

print(f"Configuration:")
print(f"  Data directory: {DATA_DIR}")
print(f"  Model directory: {MODEL_DIR}")
print(f"  Image size: {IMG_SIZE}")
print(f"  Batch size: {BATCH_SIZE}")

# Ki·ªÉm tra c√°c th∆∞ m·ª•c
print("\nChecking directories:")
for name, path in [('TRAIN', TRAIN_DIR), ('VAL', VAL_DIR), ('TEST', TEST_DIR)]:
    if os.path.exists(path):
        try:
            subdirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
            print(f"  ‚úì {name}: {path}")
            print(f"    Classes: {subdirs}")
        except Exception as e:
            print(f"  ‚úó {name}: Error - {e}")
    else:
        print(f"  ‚úó {name}: NOT FOUND - {path}")

### 2.1. Ki·ªÉm tra v√† t√¨m th∆∞ m·ª•c d·ªØ li·ªáu

In [None]:
# Ki·ªÉm tra c·∫•u tr√∫c th∆∞ m·ª•c trong Google Drive
import os

# Li·ªát k√™ c√°c th∆∞ m·ª•c trong MyDrive
print("Checking Google Drive structure...")
drive_path = '/content/drive/MyDrive'

if os.path.exists(drive_path):
    print(f"\n‚úì Found: {drive_path}")
    print("\nFolders in MyDrive:")
    for item in os.listdir(drive_path):
        item_path = os.path.join(drive_path, item)
        if os.path.isdir(item_path):
            print(f"  üìÅ {item}")
    
    # T√¨m th∆∞ m·ª•c ch·ª©a data
    print("\n" + "="*60)
    print("Searching for data folders...")
    print("="*60)
    
    # T√¨m ki·∫øm c√°c th∆∞ m·ª•c c√≥ t√™n li√™n quan
    possible_folders = []
    for item in os.listdir(drive_path):
        item_path = os.path.join(drive_path, item)
        if os.path.isdir(item_path):
            item_lower = item.lower()
            if any(keyword in item_lower for keyword in ['pneumonia', 'chest', 'xray', 'data', 'covid']):
                possible_folders.append(item)
                print(f"‚úì Found potential data folder: {item}")
                
                # Ki·ªÉm tra b√™n trong
                try:
                    sub_items = os.listdir(item_path)
                    if 'data' in sub_items or 'chest_xray' in sub_items:
                        print(f"  ‚Üí Contains: {', '.join(sub_items[:5])}")
                except:
                    pass
    
    if not possible_folders:
        print("\n‚ö† No data folders found!")
        print("Please upload your data to Google Drive first.")
        print("\nExpected structure:")
        print("  MyDrive/")
        print("    ‚îî‚îÄ‚îÄ pneumonia_project/  (or your project folder)")
        print("        ‚îî‚îÄ‚îÄ data/")
        print("            ‚îî‚îÄ‚îÄ processed/")
        print("                ‚îú‚îÄ‚îÄ train/")
        print("                ‚îÇ   ‚îú‚îÄ‚îÄ NORMAL/")
        print("                ‚îÇ   ‚îî‚îÄ‚îÄ PNEUMONIA/")
        print("                ‚îú‚îÄ‚îÄ val/")
        print("                ‚îî‚îÄ‚îÄ test/")
else:
    print(f"‚úó Drive not mounted at {drive_path}")
    print("Please run the Mount Drive cell first!")

### 2.2. C·∫≠p nh·∫≠t ƒë∆∞·ªùng d·∫´n (Ch·∫°y sau khi ki·ªÉm tra xong)

In [None]:
# C·∫¨P NH·∫¨T ƒê∆Ø·ªúNG D·∫™N N√ÄY D·ª±A tr√™n k·∫øt qu·∫£ ki·ªÉm tra ·ªü tr√™n
# V√≠ d·ª• c√°c tr∆∞·ªùng h·ª£p ph·ªï bi·∫øn:

# Tr∆∞·ªùng h·ª£p 1: D·ªØ li·ªáu trong th∆∞ m·ª•c ri√™ng
BASE_DIR = '/content/drive/MyDrive/pneumonia_project'

# Tr∆∞·ªùng h·ª£p 2: D·ªØ li·ªáu trong th∆∞ m·ª•c COVID-19_Radiography_Dataset
# BASE_DIR = '/content/drive/MyDrive/COVID-19_Radiography_Dataset'

# Tr∆∞·ªùng h·ª£p 3: D·ªØ li·ªáu trong th∆∞ m·ª•c chest_xray
# BASE_DIR = '/content/drive/MyDrive/chest_xray'

# Tr∆∞·ªùng h·ª£p 4: N·∫øu ƒë√£ c√≥ data/processed s·∫µn trong m·ªôt th∆∞ m·ª•c kh√°c
# BASE_DIR = '/content/drive/MyDrive/your_folder_name'

print(f"Using BASE_DIR: {BASE_DIR}")

# Ki·ªÉm tra xem th∆∞ m·ª•c c√≥ t·ªìn t·∫°i kh√¥ng
if os.path.exists(BASE_DIR):
    print("‚úì BASE_DIR exists!")
    print(f"\nContents of {BASE_DIR}:")
    try:
        for item in os.listdir(BASE_DIR)[:10]:  # Ch·ªâ hi·ªÉn th·ªã 10 items ƒë·∫ßu
            print(f"  - {item}")
    except Exception as e:
        print(f"  Error: {e}")
else:
    print(f"‚úó BASE_DIR does not exist: {BASE_DIR}")
    print("\n‚ö† PLEASE UPDATE BASE_DIR ABOVE!")
    print("Run the previous cell to see available folders.")

### 3. Data Generators v·ªõi Augmentation

In [None]:
# Data augmentation cho training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=[0.8, 1.2],
    zoom_range=0.1,
    fill_mode='nearest'
)

# Kh√¥ng augmentation cho validation v√† test
val_test_datagen = ImageDataGenerator(rescale=1./255)

print("Data generators created!")

### 4. Load Datasets

In [None]:
# Training data
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=42
)

# Validation data
val_generator = val_test_datagen.flow_from_directory(
    VAL_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

# Test data
test_generator = val_test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

print(f"\nClass indices: {train_generator.class_indices}")
print(f"Total training samples: {train_generator.samples}")
print(f"Total validation samples: {val_generator.samples}")
print(f"Total test samples: {test_generator.samples}")

### 5. Build Model v·ªõi DenseNet121

In [None]:
def create_model(img_size=IMG_SIZE, num_classes=NUM_CLASSES, dropout_rate=DROPOUT_RATE):
    """T·∫°o model DenseNet121 v·ªõi transfer learning"""
    
    # Load DenseNet121 pretrained tr√™n ImageNet
    base_model = DenseNet121(
        weights='imagenet',
        include_top=False,
        input_shape=(img_size[0], img_size[1], 3)
    )
    
    # Freeze base model layers
    base_model.trainable = False
    
    # Build model
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(dropout_rate),
        layers.Dense(512, activation='relu'),
        layers.Dropout(dropout_rate),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

# T·∫°o model
model = create_model()
model.summary()

print(f"\nTotal parameters: {model.count_params():,}")

### 6. Compile Model

In [None]:
# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Model compiled successfully!")

### 7. Callbacks

In [None]:
# Model checkpoint - l∆∞u model t·ªët nh·∫•t
checkpoint_path = os.path.join(MODEL_DIR, 'best_model.h5')
checkpoint = ModelCheckpoint(
    checkpoint_path,
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

# Reduce learning rate khi validation loss kh√¥ng gi·∫£m
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7,
    verbose=1
)

# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

callbacks = [checkpoint, reduce_lr, early_stop]
print("Callbacks configured!")

### 8. Training

In [None]:
print("="*60)
print("STARTING TRAINING")
print("="*60)

# Training
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks,
    verbose=1
)

print("\n" + "="*60)
print("TRAINING COMPLETED!")
print("="*60)

### 9. Visualization - Training History

In [None]:
# V·∫Ω bi·ªÉu ƒë·ªì training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss plot
ax1.plot(history.history['loss'], label='Train Loss', marker='o')
ax1.plot(history.history['val_loss'], label='Val Loss', marker='s')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training and Validation Loss')
ax1.legend()
ax1.grid(True)

# Accuracy plot
ax2.plot(history.history['accuracy'], label='Train Accuracy', marker='o')
ax2.plot(history.history['val_accuracy'], label='Val Accuracy', marker='s')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.set_title('Training and Validation Accuracy')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
history_path = os.path.join(MODEL_DIR, 'training_history.png')
plt.savefig(history_path, dpi=300, bbox_inches='tight')
plt.show()

print(f"Training history saved to {history_path}")

### 10. Test Evaluation

In [None]:
print("="*60)
print("EVALUATING ON TEST SET")
print("="*60)

# Load model t·ªët nh·∫•t
best_model = keras.models.load_model(checkpoint_path)

# Evaluate tr√™n test set
test_loss, test_acc = best_model.evaluate(test_generator, verbose=1)

print(f"\nTest Results:")
print(f"  Loss: {test_loss:.4f}")
print(f"  Accuracy: {test_acc:.4f}")

### 11. Predictions v√† Classification Report

In [None]:
# Predictions
test_generator.reset()
predictions = best_model.predict(test_generator, verbose=1)
y_pred = np.argmax(predictions, axis=1)
y_true = test_generator.classes

# Class names
class_names = list(test_generator.class_indices.keys())

# Classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

### 12. Confusion Matrix

In [None]:
# V·∫Ω confusion matrix
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names,
            cbar_kws={'label': 'Count'})
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.title('Confusion Matrix')
plt.tight_layout()

cm_path = os.path.join(MODEL_DIR, 'confusion_matrix.png')
plt.savefig(cm_path, dpi=300, bbox_inches='tight')
plt.show()

print(f"Confusion matrix saved to {cm_path}")

### 13. Save Final Model

In [None]:
# L∆∞u model cu·ªëi c√πng
final_model_path = os.path.join(MODEL_DIR, 'final_model.h5')
best_model.save(final_model_path)

print(f"\nFinal model saved to {final_model_path}")
print(f"Model format: HDF5 (.h5)")
print("\n" + "="*60)
print("ALL DONE!")
print("="*60)

### 14. Model Summary v√† Info

In [None]:
# Hi·ªÉn th·ªã th√¥ng tin model
print("Model Information:")
print(f"  Architecture: DenseNet121")
print(f"  Input shape: {IMG_SIZE + (3,)}")
print(f"  Number of classes: {NUM_CLASSES}")
print(f"  Class names: {class_names}")
print(f"  Test accuracy: {test_acc:.4f}")
print(f"\nModel files saved:")
print(f"  - {checkpoint_path}")
print(f"  - {final_model_path}")
print(f"  - {history_path}")
print(f"  - {cm_path}")

### 15. Download Models (Optional)

In [None]:
# Download model v·ªÅ local machine
from google.colab import files

# Download best model
files.download(checkpoint_path)

# Download final model
files.download(final_model_path)

# Download plots
files.download(history_path)
files.download(cm_path)

### 16. How to Load Model Later

In [None]:
# C√°ch load model ƒë·ªÉ s·ª≠ d·ª•ng sau n√†y
# loaded_model = keras.models.load_model(final_model_path)
# predictions = loaded_model.predict(your_data)

print("To load model later, use:")
print("  model = keras.models.load_model('final_model.h5')")