# PCB Defect Detector - Training Pipeline

This notebook trains a CNN model to detect 6 types of PCB defects using transfer learning.

**Setup:** Make sure to:
1. Add the dataset: `akhatova/pcb-defects`
2. Enable GPU: Settings → Accelerator → GPU

In [None]:
# Clone the repository
!git clone https://github.com/YOUR_USERNAME/pcb-defect-detector.git
%cd pcb-defect-detector

In [None]:
# Import modules
import sys
sys.path.insert(0, '/kaggle/working/pcb-defect-detector')

from pathlib import Path
from config import PipelineConfig, DataConfig, ModelConfig, TrainingConfig
from data_pipeline import DataPipeline
from model_builder import PCBModelBuilder
from trainer import Trainer
from evaluator import Evaluator

In [None]:
# Configure for Kaggle environment
config = PipelineConfig(
    data=DataConfig(
        data_dir=Path("/kaggle/input/pcb-defects"),
        image_size=(224, 224),
        batch_size=32,
        validation_split=0.15,
        test_split=0.15
    ),
    model=ModelConfig(
        base_model="MobileNetV2",
        dropout_rate=0.5,
        freeze_base=True
    ),
    training=TrainingConfig(
        epochs=30,
        learning_rate=1e-4,
        checkpoint_dir=Path("/kaggle/working/checkpoints")
    )
)

In [None]:
# Parse dataset structure
import logging
logging.basicConfig(level=logging.INFO)

dataset_path = Path("/kaggle/input/pcb-defects/PCB_DATASET")
valid_extensions = {'.jpg', '.jpeg', '.png', '.bmp'}

class_images = {}
for class_dir in sorted(dataset_path.iterdir()):
    if class_dir.is_dir():
        images = [img for img in class_dir.iterdir() if img.suffix.lower() in valid_extensions]
        if images:
            class_images[class_dir.name] = images
            print(f"{class_dir.name}: {len(images)} images")

class_names = sorted(class_images.keys())
print(f"\nTotal: {len(class_names)} classes, {sum(len(v) for v in class_images.values())} images")

In [None]:
# Prepare data pipeline
data_pipeline = DataPipeline(config.data, class_names)
data_pipeline.prepare_data(class_images)

print("Class weights:", data_pipeline.get_class_weights())

In [None]:
# Build model
model_builder = PCBModelBuilder(config.model, config.data, num_classes=len(class_names))
model = model_builder.build()
model.summary()

In [None]:
# Train model
trainer = Trainer(config.training, model)
trainer.compile()
trainer.setup_callbacks()

history = trainer.train(
    train_dataset=data_pipeline.get_train_dataset(),
    val_dataset=data_pipeline.get_val_dataset(),
    class_weights=data_pipeline.get_class_weights()
)

In [None]:
# Evaluate model
evaluator = Evaluator(model, class_names, output_dir=Path("/kaggle/working/results"))

results = evaluator.generate_full_report(
    test_dataset=data_pipeline.get_test_dataset(),
    true_labels=data_pipeline.get_test_labels(),
    history=trainer.get_history()
)

print(f"\nTest Accuracy: {results['test_accuracy']:.4f}")

In [None]:
# Display plots
import matplotlib.pyplot as plt
from IPython.display import Image, display

display(Image('/kaggle/working/results/confusion_matrix.png'))
display(Image('/kaggle/working/results/training_curves.png'))

In [None]:
# Save final model
trainer.save_model(Path("/kaggle/working/pcb_defect_model.keras"))
print("Model saved!")