# PCB Defect Detector - Kaggle Training

**Setup:**
1. Add dataset: `akhatova/pcb-defects`
2. Enable GPU: Settings → Accelerator → GPU T4 x2

In [None]:
# Clone repository
!git clone https://github.com/alainpaluku/pcb-defect-detector.git
%cd pcb-defect-detector

In [None]:
# Imports
import sys
sys.path.insert(0, '/kaggle/working/pcb-defect-detector')

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(message)s')

from pathlib import Path
from config import PipelineConfig, DataConfig, ModelConfig, TrainingConfig
from data_manager import KaggleDataManager
from data_pipeline import DataPipeline
from model_builder import PCBModelBuilder
from trainer import Trainer
from evaluator import Evaluator

In [None]:
# Configuration
config = PipelineConfig(
    data=DataConfig(
        data_dir=Path("/kaggle/input/pcb-defects"),
        image_size=(224, 224),
        batch_size=32
    ),
    model=ModelConfig(
        base_model="MobileNetV2",
        dropout_rate=0.5,
        freeze_base=True
    ),
    training=TrainingConfig(
        epochs=25,
        learning_rate=1e-3,
        fine_tune_epochs=15,
        fine_tune_layers=30,
        checkpoint_dir=Path("/kaggle/working/checkpoints")
    ),
    results_dir=Path("/kaggle/working/results")
)

In [None]:
# Load dataset
data_manager = KaggleDataManager(config.data)
data_manager.download_dataset()
class_images = data_manager.parse_directory_structure()
class_names = data_manager.get_class_names()
print(f"Classes: {class_names}")

In [None]:
# Prepare data pipeline
data_pipeline = DataPipeline(config.data, class_names)
data_pipeline.prepare_data(class_images)
print(f"Samples: {data_pipeline.get_num_samples()}")
print(f"Class weights: {data_pipeline.get_class_weights()}")

In [None]:
# Build model
model_builder = PCBModelBuilder(config.model, config.data, num_classes=len(class_names))
model = model_builder.build()
model.summary()

In [None]:
# Train (frozen base)
trainer = Trainer(config.training, model)
trainer.compile()

history = trainer.train(
    train_dataset=data_pipeline.get_train_dataset(),
    val_dataset=data_pipeline.get_val_dataset(),
    class_weights=data_pipeline.get_class_weights()
)

In [None]:
# Fine-tune
model_builder.unfreeze_layers(config.training.fine_tune_layers)

ft_history = trainer.fine_tune(
    train_dataset=data_pipeline.get_train_dataset(),
    val_dataset=data_pipeline.get_val_dataset(),
    class_weights=data_pipeline.get_class_weights()
)

In [None]:
# Save model
trainer.save_model(Path("/kaggle/working/pcb_model.keras"))

In [None]:
# Evaluate
evaluator = Evaluator(model, class_names, output_dir=config.results_dir)

results = evaluator.generate_full_report(
    test_dataset=data_pipeline.get_test_dataset(),
    true_labels=data_pipeline.get_test_labels(),
    history=trainer.get_combined_history(),
    test_paths=data_pipeline.get_test_paths()
)

print(f"\nTest Accuracy: {results['test_accuracy']:.4f}")
print(f"F1 Macro: {results['f1_macro']:.4f}")
print(f"F1 Weighted: {results['f1_weighted']:.4f}")

In [None]:
# Display results
from IPython.display import Image, display
import matplotlib.pyplot as plt

for img_file in ['training_curves.png', 'confusion_matrix_normalized.png', 'misclassified.png']:
    img_path = config.results_dir / img_file
    if img_path.exists():
        display(Image(str(img_path)))