# Final Project CFDS - Pipeline & Tuning

This notebook demonstrates how to use the `best_library` to run the end-to-end machine learning pipeline and perform hyperparameter tuning.

In [None]:
import sys
import os

# Add the parent directory to sys.path to access src
sys.path.append(os.path.abspath('..'))

In [None]:
import torch
from src.best_library.split.split_train_test import split_dataset
from src.best_library.preprocessing.preprocessing import Preprocessing
from src.best_library.data.load_data import load_data
from src.best_library.model.model_definition import build_model
from src.best_library.model.train import train_model
from src.best_library.features.feature_engineering import compute_dataset_stats
from src.best_library.hyperparameter_tuning.tuner import HyperparameterTuner

## 1. Standard Pipeline Execution

Here we define our configuration, split the data, compute statistics, and train a baseline model.

In [None]:
# Configuration
# Note: Paths are relative to the notebook location
DATASET_DIR = "../dataset"
WORK_DIR = "../data"
BATCH_SIZE = 16
LR = 1e-4
EPOCHS = 5
IMG_SIZE = 224

In [None]:
# 1. Split Data
split_dataset(DATASET_DIR, WORK_DIR)

In [None]:
# 2. Feature Engineering (Compute Stats)
train_dir = os.path.join(WORK_DIR, "train")
if os.path.exists(train_dir):
    compute_dataset_stats(train_dir, img_size=IMG_SIZE)

In [None]:
# 3. Preprocessing & Data Loading
preprocessing = Preprocessing(img_size=IMG_SIZE)
transform = preprocessing.get_transform()

train_loader, val_loader, class_names = load_data(WORK_DIR, BATCH_SIZE, transform)

In [None]:
# 4. Build & Train Model
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

model = build_model(device, num_classes=len(class_names))

train_model(model, train_loader, val_loader, EPOCHS, LR, device, save_path="../alpaca_classifier_notebook.pt")

## 2. Hyperparameter Tuning

Now we use the `HyperparameterTuner` to find the best configuration.

In [None]:
param_grid = {
    'lr': [1e-3, 1e-4],
    'batch_size': [8, 16],
    'epochs': [3] # Keeping it short for demo
}

print("Initializing Tuner...")
tuner = HyperparameterTuner(WORK_DIR, param_grid, img_size=IMG_SIZE)

best_params, best_acc = tuner.tune()

print(f"Optimization finished! Best Params: {best_params}")