In [None]:
from comet_ml.api import API
from loguru import logger

WORKSPACE_NAME = "derick-amontagna"


def get_best_exp(project_name: str, metric_select: str = "best_score"):
    api = API()
    experiments = api.get(project_name=project_name, workspace=WORKSPACE_NAME)
    best_experiment = None
    best_dropout = None
    best_g = None
    best_score = 0

    for exp in experiments:
        metrics = exp.get_metrics()
        name = exp.get_name()
        for metric in metrics:
            if metric["metricName"] == metric_select:
                score = float(metric["metricValue"])
                if score > best_score:
                    best_score = score
                    best_experiment = name

    best_exp = api.get_experiment(WORKSPACE_NAME, project_name, best_experiment)
    for parameters in best_exp.get_parameters_summary():
        if parameters["name"] == "dropout":
            best_dropout = float(parameters["valueCurrent"])
    for parameters in best_exp.get_parameters_summary():
        if parameters["name"] == "G_arch":
            best_g = parameters["valueCurrent"]

    logger.info("*" * 25)
    logger.info(
        f"Best exp_name: {best_experiment} || for best_score: {best_score} with dropout: {best_dropout} and {best_g}"
    )
    return best_experiment, best_dropout, best_g

In [None]:
from comet_ml.api import API
from loguru import logger

import pandas as pd

WORKSPACE_NAME = "derick-amontagna"


def get_best_exp_mean(project_name: str):
    api = API()
    experiments = api.get(project_name=project_name, workspace=WORKSPACE_NAME)

    # Extract data
    data = []
    for exp in experiments:
        exp_data = {
            "experiment_id": exp.id,
            "name": exp.get_name(),
            "metrics": exp.get_metrics(),
            "parameters": exp.get_parameters_summary(),
        }
        data.append(exp_data)

    # Convert to DataFrame
    df = pd.DataFrame(data)

    def extract_best_score(metrics_list):
        best_scores = []
        for metric in metrics_list:
            if metric["metricName"] == "best_score":
                best_scores.append(metric["metricValue"])
        return max(best_scores)

    def extract_parameters(parameters_list, name):
        for parameter in parameters_list:
            if parameter["name"] == name:
                output = parameter["valueCurrent"]
                break
        return output

    # Criar nova coluna somente com "best_score"
    df["best_score"] = df["metrics"].apply(extract_best_score)

    df["seed"] = df["parameters"].apply(lambda x: extract_parameters(x, "seed"))
    df["lr"] = df["parameters"].apply(lambda x: extract_parameters(x, "lr"))
    df["g_arch"] = df["parameters"].apply(lambda x: extract_parameters(x, "G_arch"))
    df["best_score"] = df["best_score"].astype(float)

    df.drop(columns=["metrics", "parameters"], inplace=True)

    df_mean = df.groupby(["lr", "g_arch"])["best_score"].mean().reset_index()
    df_mean.rename(columns={"best_score": "mean_best_score"}, inplace=True)

    df_std = df.groupby(["lr", "g_arch"])["best_score"].std().reset_index()
    df_std.rename(columns={"best_score": "std_best_score"}, inplace=True)

    df_merged = pd.merge(
        pd.merge(df, df_mean, on=["lr", "g_arch"], how="inner"),
        df_std,
        on=["lr", "g_arch"],
        how="inner",
    )

    return df_merged.sort_values(by="mean_best_score", ascending=False)

In [None]:
import os

import torch
import torch.nn as nn

from src.pytorch_adapt.containers import Models
from src.pytorch_adapt.models import Discriminator
from common.networks import ARCHITECTURES, Classifier


def get_model(
    G_arch="resnet50", model_name=None, device=None, dropout=0.2, num_classes=2
):
    # Get the G
    G = ARCHITECTURES[G_arch]["model"](weights=ARCHITECTURES[G_arch]["weights"])
    if G_arch in ["vgg16", "densenet161", "densenet201"]:
        G.classifier = nn.Identity()
    else:
        G.fc = nn.Identity()
    feature_dim = {
        "resnet18": 512,
        "resnet34": 512,
        "resnet50": 2048,
        "resnet101": 2048,
        "vgg16": 25088,
    }[G_arch]

    G_state_dict = torch.load(
        os.path.join(
            "checkpoints",
            f"{model_name}.pth",
        )
    )[
        "models"
    ]["G"]
    G.load_state_dict(G_state_dict, strict=True)
    G = G.to(device)
    hidden_size = {
        "resnet18": 256,
        "resnet34": 256,
        "resnet50": 512,
        "resnet101": 2048,
    }[G_arch]

    # Get the C
    C = Classifier(
        in_size=feature_dim,
        hidden_size=hidden_size,
        dropout=dropout,
        num_classes=num_classes,
    )
    C_state_dict = torch.load(
        os.path.join(
            "checkpoints",
            f"{model_name}.pth",
        )
    )[
        "models"
    ]["C"]
    C.load_state_dict(C_state_dict, strict=True)
    C = C.to(device)
    # Get the D
    D = Discriminator(in_size=feature_dim)
    D = D.to(device)
    return Models({"G": G, "C": C, "D": D})

In [None]:
import torch
from tqdm.auto import tqdm

import torch.nn.functional as F
from src.pytorch_adapt.utils.common_functions import batch_to_device
from src.pytorch_adapt.validators import AccuracyValidator, AUCValidator


def gen_data_score(
    model: torch.nn.Module,
    dataloaders: torch.utils.data.DataLoader,
    data_type: str = "target_val_with_labels",
    device: torch.device = None,
    exp=None,
):
    logger.info(f"Eval - {data_type}")
    model.eval()
    G, C = model["G"], model["C"]
    labels, logits, preds = [], [], []
    data_side = data_type.split("_")[0]
    with torch.no_grad():
        for data in tqdm(dataloaders[data_type]):
            data = batch_to_device(data, device)
            logit = C(G(data[f"{data_side}_imgs"]))
            if isinstance(logit, list):
                logit = logit[0]
            pred = F.softmax(logit, dim=-1)
            logits.append(logit)
            preds.append(pred)
            if f"{data_side}_labels" in data:
                label = data[f"{data_side}_labels"]
                labels.append(label)
        logits = torch.cat(logits, dim=0)
        preds = torch.cat(preds, dim=0)
        if labels:
            labels = torch.cat(labels, dim=0)
            data_score = {"logits": logits, "preds": preds, "labels": labels}
        else:
            data_score = {"logits": logits, "preds": preds}
    return data_score

In [None]:
"""
Contains functionality for creating PyTorch DataLoaders
"""

import os
import random

import torch
from PIL import Image
from loguru import logger
from torchvision.transforms import v2 as transforms
from pytorch_adapt.datasets import (
    DataloaderCreator,
    SourceDataset,
    TargetDataset,
    CombinedSourceAndTargetDataset,
    ConcatDataset,
)

from common.CustomData.MRI_NII_2D import Dataset2D

NUM_WORKERS = os.cpu_count()


class GrayscaleToRGB:
    def __call__(self, x):
        if x.size(0) == 3:
            return x
        elif x.size(0) == 1:
            return torch.cat([x, x, x], dim=0)
        else:
            raise Exception("Image is not grayscale (or even RGB).")


class RandomRotFlip:
    def __call__(self, image):
        k = torch.randint(
            0, 4, (1,)
        ).item()  # Rotação aleatória (0, 90, 180, 270 graus)
        image = torch.rot90(image, k, dims=(-2, -1))

        if torch.rand(1).item() > 0.5:
            image = torch.flip(image, dims=[-1])  # Flip horizontal
        if torch.rand(1).item() > 0.5:
            image = torch.flip(image, dims=[-2])  # Flip vertical

        return image


class RandomRotate:
    def __init__(self, angle_range=(-20, 20)):
        self.angle_range = angle_range

    def __call__(self, image):
        angle = random.randint(
            self.angle_range[0], self.angle_range[1]
        )  # Ângulo aleatório
        return transforms.functional.rotate(
            image, angle, interpolation=transforms.InterpolationMode.NEAREST
        )


data_transforms = {
    "train": transforms.Compose(
        [
            transforms.ToTensor(),
            GrayscaleToRGB(),
            RandomRotFlip(),
            RandomRotate(angle_range=(-20, 20)),
            transforms.ToDtype(torch.float32),
            # MinMaxNormalize(),
        ]
    ),
    "val_test": transforms.Compose(
        [
            transforms.ToTensor(),
            GrayscaleToRGB(),
            transforms.ToDtype(torch.float32),
            # MinMaxNormalize(),
        ]
    ),
}


DOMAINS = {
    "ADNI1-GE": os.path.join(
        "data", "ADNI1-T1-AD-CN", "Image", "Preprocess", "6_step_nifti_2d", "GE"
    ),
    "ADNI1-Philips": os.path.join(
        "data", "ADNI1-T1-AD-CN", "Image", "Preprocess", "6_step_nifti_2d", "Philips"
    ),
    "ADNI1-Siemens": os.path.join(
        "data", "ADNI1-T1-AD-CN", "Image", "Preprocess", "6_step_nifti_2d", "Siemens"
    ),
    "ADNI1-GE-3D": os.path.join(
        "data", "ADNI1-T1-AD-CN", "Image", "Preprocess", "5_step_class_folders", "GE"
    ),
    "ADNI1-Philips-3D": os.path.join(
        "data",
        "ADNI1-T1-AD-CN",
        "Image",
        "Preprocess",
        "5_step_class_folders",
        "Philips",
    ),
    "ADNI1-Siemens-3D": os.path.join(
        "data",
        "ADNI1-T1-AD-CN",
        "Image",
        "Preprocess",
        "5_step_class_folders",
        "Siemens",
    ),
}


def create_dataloaders_mri_2d(
    source: str,
    target: str,
    transform_train: transforms.Compose = data_transforms["train"],
    transform_val_test: transforms.Compose = data_transforms["val_test"],
    algorithm: str = "source-only",
    validator: str = "Accuracy",
    batch_size: int = 64,
    num_workers: int = 0,
    seed: int = None,
):
    logger.info(f"Loading Source and Target Datasets".center(70, "+"))
    data_output = {
        "src": {"train": {}, "val_test": {}},
        "target": {"train": {}, "val_test": {}},
    }
    for domain_side, domain in zip(["src", "target"], [source, target]):
        for split in ["train", "val", "test"]:
            data = Dataset2D(
                domain=DOMAINS[domain], split=split, transform=transform_val_test
            )
            data_output[domain_side]["val_test"][split] = data
            if split in ["train", "val"] and domain_side == "src":
                data_transform = Dataset2D(
                    domain=DOMAINS[domain], split=split, transform=transform_train
                )
                data_output[domain_side]["train"][split] = data_transform
            elif split == "train" and domain_side == "target":
                data_transform = Dataset2D(
                    domain=DOMAINS[domain], split=split, transform=transform_train
                )
                data_output[domain_side]["train"][split] = data_transform

    logger.info(f"Create Source and Target Datasets".center(70, "+"))
    dataset = {}
    if algorithm == "source-only":
        dataset["src_train"] = SourceDataset(data_output["src"]["train"]["train"])
    else:
        dataset["src_train"] = SourceDataset(data_output["src"]["val_test"]["train"])
    dataset["src_val"] = SourceDataset(data_output["src"]["val_test"]["val"])
    dataset["src_test"] = SourceDataset(data_output["src"]["val_test"]["test"])

    dataset["target_train"] = TargetDataset(data_output["target"]["val_test"]["train"])
    dataset["target_val"] = TargetDataset(data_output["target"]["val_test"]["val"])
    dataset["target_test"] = TargetDataset(data_output["target"]["val_test"]["test"])

    dataset["target_train_with_labels"] = TargetDataset(
        data_output["target"]["val_test"]["train"], domain=1, supervised=True
    )
    dataset["target_val_with_labels"] = TargetDataset(
        data_output["target"]["val_test"]["val"], domain=1, supervised=True
    )
    dataset["target_test_with_labels"] = TargetDataset(
        data_output["target"]["val_test"]["test"], domain=1, supervised=True
    )

    dataset["train"] = CombinedSourceAndTargetDataset(
        SourceDataset(data_output["src"]["train"]["train"]),
        TargetDataset(data_output["target"]["train"]["train"]),
    )

    if algorithm == "source-only":
        train_names, val_names = ["src_train"], [
            "train",
            "src_val",
            "src_test",
            "target_train",
            "target_train_with_labels",
            "target_val",
            "target_val_with_labels",
            "target_test",
            "target_test_with_labels",
        ]
    else:
        train_names, val_names = ["train"], [
            "src_train",
            "src_val",
            "src_test",
            "target_train",
            "target_train_with_labels",
            "target_val",
            "target_val_with_labels",
            "target_test",
            "target_test_with_labels",
        ]

    logger.info(f"Create Dataloader".center(70, "+"))
    dc = DataloaderCreator(
        batch_size=batch_size,
        num_workers=num_workers,
        train_names=train_names,
        val_names=val_names,
        seed=seed,
    )

    dataloaders = dc(**dataset)
    target_dataset_size = len(dataset["target_train"])
    logger.info(f"Finishing the Creation of Dataloaders".center(70, "+"))
    return dataloaders, target_dataset_size, train_names[0]

In [None]:
# Exp
# name, seed, g = get_best_exp(project_name="adni1-so-siemens")
source = "ADNI1-GE"
target = "ADNI1-Siemens"
project_name = "adni1-so-ge-real"

# Init process
df = get_best_exp_mean(project_name=project_name)
df_3 = df.head(3).sort_values(by="best_score", ascending=False)
name = df_3["name"].iloc[0]
g = df_3["g_arch"].iloc[0]
seed = df_3["seed"].iloc[0]


results = []
for i in range(3):
    result = {}
    name = df_3["name"].iloc[i]
    g = df_3["g_arch"].iloc[i]
    seed = df_3["seed"].iloc[i]
    result['name'] = name
    # Model
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = get_model(
        G_arch=g, model_name=name, dropout=0.5, device=device
    ) 

    # Data
    # from common.data_setup import create_dataloaders_mri_2d
    dataloaders, target_dataset_size, train_name = create_dataloaders_mri_2d(
        source=source,
        target=target,
        algorithm="source-only",
        batch_size=128,
        num_workers=8,
        seed=int(seed),
    )

    # Gen Scores
    data_score_source = gen_data_score(model, dataloaders, "src_" + "test", device, None)
    data_score_target = gen_data_score(
        model, dataloaders, "target_" + "train" + "_with_labels", device, None
    )
    result['Acc_Source'] = AccuracyValidator()(**{"src_val": data_score_source})
    result['Acc_Target'] = AccuracyValidator()(**{"src_val": data_score_target})
    result['AUC_Source'] = AUCValidator()(**{"src_val": data_score_source})
    result['AUC_Target'] = AUCValidator()(**{"src_val": data_score_target})
    results.append(result)

#'ultimate_fixture_9510' - GE
# 'loud_shrimp_6199' - Philips
# 'regional_purlin_4276' - Siemens

In [None]:
# Data
# from common.data_setup import create_dataloaders_mri_2d
source = "ADNI1-GE"
target = "ADNI1-Siemens"
dataloaders, target_dataset_size, train_name = create_dataloaders_mri_2d(
    source=source,
    target=target,
    algorithm="source-only",
    batch_size=128,
    num_workers=8,
    seed=int(seed),
)

In [None]:
# Gen Scores
data_score_source = gen_data_score(model, dataloaders, "src_" + "test", device, None)
data_score_target = gen_data_score(
    model, dataloaders, "target_" + "train" + "_with_labels", device, None
)

In [None]:
AccuracyValidator()(**{"src_val": data_score_source})

In [None]:
AccuracyValidator()(**{"src_val": data_score_target})

In [None]:
AUCValidator()(**{"src_val": data_score_source})  # 89

In [None]:
AUCValidator()(**{"src_val": data_score_target})  # 79