In [1]:
%pwd

'/home/fbutic/Documents/cancer-classification/research'

In [2]:
import os
os.chdir("..")

In [3]:
%pwd

'/home/fbutic/Documents/cancer-classification'

In [4]:
# Do not show this to anyone; use secrets.yaml (ask chatgpt for start how to do it and where?)
os.environ["MLFLOW_TRACKING_URI"]="https://dagshub.com/buzaXnov/cancer-classification.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"]="buzaXnov"
os.environ["MLFLOW_TRACKING_PASSWORD"]="c2f97b6e1897763abc2b78d3781adbbf30ba2026"

MLFLOW_TRACKING_URI=https://dagshub.com/buzaXnov/cancer-classification.mlflow \
MLFLOW_TRACKING_USERNAME=buzaXnov \
MLFLOW_TRACKING_PASSWORD=c2f97b6e1897763abc2b78d3781adbbf30ba2026 \
python script.py

In [5]:
import torch

from cnnClassifier.components.training import Training
from cnnClassifier.config.configuration import ConfigurationManager

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = torch.load("artifacts/training/checkpoints/best.pt")

In [7]:
from pathlib import Path
from dataclasses import dataclass
from cnnClassifier.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from cnnClassifier.utils.common import read_yaml, create_directories, save_json

@dataclass(frozen=True)
class EvaluationConfig:
    path_of_model: Path
    training_data: Path
    all_params: dict
    mlflow_uri: str
    params_image_size: list
    params_batch_size: int

In [8]:
class ConfigurationManager:
    def __init__(
        self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH
    ) -> None:
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_eval_config(self) -> EvaluationConfig:
        training = self.config.training
        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "Data")
        model_path = os.path.join(training.checkpoints_dir, "best.pt")

        eval_config = EvaluationConfig(
            path_of_model=model_path,
            training_data=training_data,
            all_params=self.params,
            mlflow_uri=os.getenv("MLFLOW_TRACKING_URI"),
            params_image_size=self.params.IMAGE_SIZE,
            params_batch_size=self.params.BATCH_SIZE
        )

        return eval_config

In [9]:
import os

import torch
import torch.nn as nn
import torchvision as torchvision
from torchvision import transforms
import torch.utils.data as data
from tqdm import tqdm

In [10]:
from urllib.parse import urlparse
import mlflow


class Evaluation:
    def __init__(self, config) -> None:
        self.config: EvaluationConfig = config
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = None

    def get_trained_model(self):
        """Download the base model."""
        self.model = torch.load(self.config.path_of_model).to(self.device)

    def get_dataloader(self):
        """Create the dataloader for the test dataset."""
        TEST_DATA_PATH = os.path.join(self.config.training_data, "test")

        transform_img = transforms.Compose(
            [
                transforms.Resize(self.config.params_image_size[:-1]),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

        test_dataset = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=transform_img)

        # Dataloaders
        BATCH_SIZE = self.config.params_batch_size
        self.test_loader = data.DataLoader(
            dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4
        )

    def test(self):
        self.model.eval()
        total, correct = 0, 0
        with torch.no_grad():
            for inputs, labels in tqdm(self.test_loader):
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        self.test_accuracy = 100 * correct / total        

        print(f"Test Accuracy: {self.test_accuracy}%")
    
    def save_score(self):
        scores = { "test_accuracy": self.test_accuracy }
        save_json(scores, Path("scores.json"))

    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_uri_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics(
                {"test_accuracy" : self.test_accuracy}
            )

            if tracking_uri_type_store != "file":
                mlflow.pytorch.log_model(self.model, "model", registered_model_name="VGG16")
            else:
                mlflow.pytorch.log_model(self.model, "model")

In [11]:
try:
    config = ConfigurationManager()
    eval_config = config.get_eval_config()
    evaluation = Evaluation(eval_config)
    evaluation.get_trained_model()
    evaluation.get_dataloader()
    evaluation.test()
    evaluation.log_into_mlflow()
    evaluation.save_score()
except Exception as e:
    raise e

[2024-05-15 15:40:35,945: INFO: common: YAML file: config/config.yaml loaded successfully!]
[2024-05-15 15:40:35,949: INFO: common: YAML file: params.yaml loaded successfully!]
[2024-05-15 15:40:35,951: INFO: common: Created directory at: artifacts]


100%|██████████| 20/20 [00:01<00:00, 11.93it/s]


Test Accuracy: 56.507936507936506%


Registered model 'VGG16' already exists. Creating a new version of this model...
2024/05/15 15:42:35 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16, version 3
Created version '3' of model 'VGG16'.


[2024-05-15 15:42:36,289: INFO: common: JSON file saved at scores.json]
