In [1]:
import pandas as pd
import numpy as np
from torchvision.datasets import Caltech101, Caltech256

# Get label names
caltech101_labels = Caltech101(root="datasets/caltech101", download=False).categories
caltech256_labels = Caltech256(root="datasets/caltech256", download=False).categories

# Get targets
caltech101_targets = pd.read_csv("outputs/caltech101.csv")
caltech256_targets = pd.read_csv("outputs/caltech256.csv")
caltech101_model_caltech256 = pd.read_csv("outputs/caltech101_model_caltech256.csv")
caltech256_model_caltech101 = pd.read_csv("outputs/caltech256_model_caltech101.csv")

In [2]:
def form_correlation_matrix(
    targets: np.ndarray[int], predictions: np.ndarray[int]
) -> np.ndarray[int]:
    correlations = np.zeros((np.max(targets) + 1, np.max(predictions) + 1), dtype=int)

    for i in range(len(predictions)):
        correlations[targets[i], predictions[i]] += 1

    return correlations


correlations_caltech101_model_caltech256 = form_correlation_matrix(
    caltech256_targets["targets"].to_numpy(),
    caltech101_model_caltech256["predictions"].to_numpy(),
)
correlations_caltech256_model_caltech101 = form_correlation_matrix(
    caltech101_targets["targets"].to_numpy(),
    caltech256_model_caltech101["predictions"].to_numpy(),
)

print(correlations_caltech101_model_caltech256[1])


def most_common_foreign_prediction(
    correlations: np.ndarray[int],
) -> tuple[np.ndarray[int], np.ndarray[float]]:
    values = np.zeros(correlations.shape[0], dtype=int)
    probababilities = np.zeros(correlations.shape[0], dtype=float)
    for i in range(correlations.shape[0]):
        values[i] = np.argmax(correlations[i, :])
        probababilities[i] = correlations[i, values[i]] / np.sum(correlations[i, :])

    return values, probababilities


def most_common_foreign_prediction_labels(
    correlations: np.ndarray[int],
    target_labels: list[str],
    prediction_labels: list[str],
) -> list[tuple[str, str, float]]:
    mcfp = most_common_foreign_prediction(correlations)
    values = []
    for i in range(len(mcfp[0])):
        values.append(
            (
                target_labels[i],
                prediction_labels[mcfp[0][i]],
                mcfp[1][i],
            )
        )

    return values


mapping_caltech101_model_caltech256 = most_common_foreign_prediction_labels(
    correlations_caltech101_model_caltech256,
    caltech256_labels,
    caltech101_labels,
)
mapping_caltech256_model_caltech101 = most_common_foreign_prediction_labels(
    correlations_caltech256_model_caltech101,
    caltech101_labels,
    caltech256_labels,
)

[0 0 0 0 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0]


  probababilities[i] = correlations[i, values[i]] / np.sum(correlations[i, :])
