In [None]:
# Import necessary libraries
import os
import numpy as np
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

from sklearn.metrics import accuracy_score

# Suppress warnings
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Define device for computation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# ==========================
# Part 1: Train ResNet18 on CIFAR-10
# ==========================

# Function to train ResNet18 on CIFAR-10
def train_resnet18_on_cifar10(num_epochs=10):
    # CIFAR-10 mean and std
    mean = [0.4914, 0.4822, 0.4465]
    std = [0.2023, 0.1994, 0.2010]

    # Transformations for training and testing
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ])

    # CIFAR-10 datasets
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

    # Data loaders
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

    # Define the model
    model = models.resnet18(num_classes=10)
    model = model.to(device)

    # Loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        total = 0
        correct = 0
        for inputs, targets in tqdm(trainloader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        scheduler.step()
        epoch_loss = running_loss / total
        epoch_acc = 100. * correct / total
        print(f'Training Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_acc:.2f}%')

        # Validate on test set
        model.eval()
        total = 0
        correct = 0
        with torch.no_grad():
            for inputs, targets in testloader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
        test_acc = 100. * correct / total
        print(f'Validation Accuracy: {test_acc:.2f}%')

    # Save the trained model
    torch.save(model.state_dict(), 'resnet18_cifar10.pth')
    print('Model trained and saved as resnet18_cifar10.pth')
    return model

# ==========================
# Part 2: Feature Extraction and Prototype Learning Functions
# ==========================

# Function to get the feature extractor model
def get_feature_extractor():
    # Load the trained ResNet18 model
    model = models.resnet18(num_classes=10)
    model.load_state_dict(torch.load('resnet18_cifar10.pth', map_location=device))
    # Remove the final fully connected layer
    model.fc = nn.Identity()
    model.eval()
    return model.to(device)

# CIFAR-10 mean and std for normalization
mean = [0.4914, 0.4822, 0.4465]
std = [0.2023, 0.1994, 0.2010]

# Class names and mapping
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']
class_to_idx = {name: idx for idx, name in enumerate(class_names)}
num_classes = len(class_names)

# Function to load images and labels from .tar.pth files
def load_images_from_pth(file_path, labeled=True):
    data = torch.load(file_path)
    images = data['data']
    if labeled:
        labels = data['targets']
        # Convert labels to integers if necessary
        if isinstance(labels[0], str):
            labels = [class_to_idx[label] for label in labels]
        return images, labels
    else:
        return images

# Function to extract features from images
def extract_features(images, model):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ])
    features = []
    with torch.no_grad():
        for img in tqdm(images, desc='Extracting features'):
            if isinstance(img, torch.Tensor):
                img = img.to(device)
                if img.max() > 1.0:
                    img = img / 255.0
                img = transforms.Normalize(mean=mean, std=std)(img)
                img = img.unsqueeze(0)
            else:
                if isinstance(img, np.ndarray):
                    img = Image.fromarray(img)
                img = transform(img).unsqueeze(0).to(device)
            feat = model(img).cpu().numpy().squeeze()
            features.append(feat)
    return np.array(features)

# Function to compute prototypes for each class
def compute_prototypes(features, labels, num_classes):
    prototypes = np.zeros((num_classes, features.shape[1]))
    counts = np.zeros(num_classes)
    for feat, label in zip(features, labels):
        prototypes[label] += feat
        counts[label] += 1
    for i in range(num_classes):
        if counts[i] > 0:
            prototypes[i] /= counts[i]
    return prototypes

# Function to predict labels using prototypes and calculate confidences
def predict_labels(features, prototypes):
    distances = np.linalg.norm(features[:, np.newaxis] - prototypes, axis=2)
    predicted_labels = np.argmin(distances, axis=1)
    confidences = np.exp(-np.min(distances, axis=1))  # Convert distances to confidences
    return predicted_labels, confidences

# Function to update prototypes with new data using confidence thresholding
def update_prototypes(old_prototypes, new_features, new_labels, confidences, alpha=0.7, confidence_threshold=0.5):
    num_classes = old_prototypes.shape[0]
    new_prototypes = np.copy(old_prototypes)
    for c in range(num_classes):
        idxs = np.where((new_labels == c) & (confidences >= confidence_threshold))[0]
        if len(idxs) > 0:
            class_features = new_features[idxs]
            class_mean = class_features.mean(axis=0)
            new_prototypes[c] = alpha * old_prototypes[c] + (1 - alpha) * class_mean
        else:
            new_prototypes[c] = old_prototypes[c]
    return new_prototypes

# Function to evaluate models on held-out datasets for Task 1
def evaluate_models_task1(models, num_classes):
    num_models = len(models)
    accuracy_matrix = np.zeros((num_models, num_models))
    for model_idx in range(num_models):
        prototypes = models[model_idx]
        for dataset_idx in range(model_idx + 1):
            eval_dataset_num = dataset_idx + 1
            eval_file_path = f'{eval_dataset_num}_eval_data.tar.pth'
            print(f'\nEvaluating model f{model_idx + 1} on dataset D\'{eval_dataset_num}')
            eval_images, eval_labels = load_images_from_pth(eval_file_path, labeled=True)
            eval_features = extract_features(eval_images, feature_extractor)
            eval_predicted_labels, _ = predict_labels(eval_features, prototypes)
            accuracy = accuracy_score(eval_labels, eval_predicted_labels)
            accuracy_matrix[model_idx, dataset_idx] = accuracy
            print(f'Accuracy: {accuracy * 100:.2f}%')
    return accuracy_matrix

# Function to evaluate models on held-out datasets for Task 2
def evaluate_models_task2(models, num_classes):
    num_models = len(models)  # models f11 to f20
    accuracy_matrix = np.full((num_models, 20), np.nan)  # Datasets D1 to D20
    for model_idx in range(num_models):
        prototypes = models[model_idx]
        model_num = model_idx + 11  # Since models are f11 to f20
        for dataset_idx in range(model_num):  # Evaluate on datasets D1 to D_{model_num}
            eval_dataset_num = dataset_idx + 1
            eval_file_path = f'{eval_dataset_num}_eval_data.tar.pth'
            print(f'\nEvaluating model f{model_num} on dataset D\'{eval_dataset_num}')
            eval_images, eval_labels = load_images_from_pth(eval_file_path, labeled=True)
            eval_features = extract_features(eval_images, feature_extractor)
            eval_predicted_labels, _ = predict_labels(eval_features, prototypes)
            accuracy = accuracy_score(eval_labels, eval_predicted_labels)
            accuracy_matrix[model_idx, dataset_idx] = accuracy
            print(f'Accuracy: {accuracy * 100:.2f}%')
    return accuracy_matrix

# ==========================
# Main Script
# ==========================

if __name__ == '__main__':
    # Part 1: Train ResNet18 on CIFAR-10 (if not already trained)
    if not os.path.exists('resnet18_cifar10.pth'):
        print('Training ResNet18 on CIFAR-10...')
        trained_model = train_resnet18_on_cifar10(num_epochs=10)  # Adjust the number of epochs as needed
    else:
        print('Pre-trained ResNet18 model found.')

    # Part 2: Feature Extraction and Prototype Learning
    # Load the feature extractor model
    feature_extractor = get_feature_extractor()

    models = []  # List to store prototypes (models f1 to f20)

    # Task 1: Process datasets D1 to D10
    for i in range(1, 11):
        print(f'\nProcessing dataset D{i}')
        train_file_path = f'{i}_train_data.tar.pth'
        if i == 1:
            # D1 is labeled
            images, labels = load_images_from_pth(train_file_path, labeled=True)
            features = extract_features(images, feature_extractor)
            prototypes = compute_prototypes(features, labels, num_classes)
            models.append(prototypes)
        else:
            # D2 to D10 are unlabeled
            images = load_images_from_pth(train_file_path, labeled=False)
            features = extract_features(images, feature_extractor)
            # Predict labels using the previous model
            previous_prototypes = models[-1]
            predicted_labels, confidences = predict_labels(features, previous_prototypes)
            # Update prototypes to get the new model
            new_prototypes = update_prototypes(
                previous_prototypes, features, predicted_labels, confidences,
                alpha=0.7, confidence_threshold=0.5)
            models.append(new_prototypes)
        print(f'Model f{i} updated.')

    # Evaluate the models on the held-out datasets D'1 to D'10 (Task 1)
    print('\nEvaluating models on held-out datasets for Task 1...')
    accuracy_matrix_task1 = evaluate_models_task1(models[:10], num_classes)

    # Display the accuracy matrix for Task 1
    print('\nAccuracy Matrix for Task 1:')
    for i in range(accuracy_matrix_task1.shape[0]):
        accuracies = '\t'.join([f'{accuracy_matrix_task1[i, j]*100:.2f}%' for j in range(i+1)])
        print(f'Model f{i+1}: {accuracies}')

    # Task 2: Process datasets D11 to D20 starting from f10
    print('\nStarting Task 2: Processing datasets D11 to D20')
    for i in range(11, 21):
        print(f'\nProcessing dataset D{i}')
        train_file_path = f'{i}_train_data.tar.pth'
        images = load_images_from_pth(train_file_path, labeled=False)
        features = extract_features(images, feature_extractor)
        # Predict labels using the previous model
        previous_prototypes = models[-1]
        predicted_labels, confidences = predict_labels(features, previous_prototypes)
        # Update prototypes to get the new model
        new_prototypes = update_prototypes(
            previous_prototypes, features, predicted_labels, confidences,
            alpha=0.5, confidence_threshold=0.7)  # Adjusted alpha and confidence_threshold for Task 2
        models.append(new_prototypes)
        print(f'Model f{i} updated.')

    # Evaluate the models on the held-out datasets D'1 to D'20 (Task 2)
    print('\nEvaluating models on held-out datasets for Task 2...')
    accuracy_matrix_task2 = evaluate_models_task2(models[10:], num_classes)  # Models f11 to f20

    # Display the accuracy matrix for Task 2
    print('\nAccuracy Matrix for Task 2:')
    for i in range(10):  # Models f11 to f20
        model_num = i + 11
        accuracies = []
        for j in range(model_num):  # Only datasets evaluated
            if not np.isnan(accuracy_matrix_task2[i, j]):
                accuracies.append(f'{accuracy_matrix_task2[i, j]*100:.2f}%')
            else:
                accuracies.append('---')
        print(f'Model f{model_num}: ' + '\t'.join(accuracies))



Using device: cpu
Pre-trained ResNet18 model found.

Processing dataset D1


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.75it/s]


Model f1 updated.

Processing dataset D2


Extracting features: 100%|██████████| 2500/2500 [00:30<00:00, 80.89it/s]


Model f2 updated.

Processing dataset D3


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.73it/s]


Model f3 updated.

Processing dataset D4


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 80.18it/s]


Model f4 updated.

Processing dataset D5


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.65it/s]


Model f5 updated.

Processing dataset D6


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 79.35it/s]


Model f6 updated.

Processing dataset D7


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.70it/s]


Model f7 updated.

Processing dataset D8


Extracting features: 100%|██████████| 2500/2500 [00:30<00:00, 82.20it/s]


Model f8 updated.

Processing dataset D9


Extracting features: 100%|██████████| 2500/2500 [00:30<00:00, 80.95it/s]


Model f9 updated.

Processing dataset D10


Extracting features: 100%|██████████| 2500/2500 [00:30<00:00, 81.40it/s]


Model f10 updated.

Evaluating models on held-out datasets for Task 1...

Evaluating model f1 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.87it/s]


Accuracy: 79.20%

Evaluating model f2 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.80it/s]


Accuracy: 79.20%

Evaluating model f2 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 73.57it/s]


Accuracy: 80.32%

Evaluating model f3 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.17it/s]


Accuracy: 79.20%

Evaluating model f3 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.12it/s]


Accuracy: 80.32%

Evaluating model f3 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.91it/s]


Accuracy: 78.32%

Evaluating model f4 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.68it/s]


Accuracy: 79.20%

Evaluating model f4 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 71.85it/s]


Accuracy: 80.32%

Evaluating model f4 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.51it/s]


Accuracy: 78.32%

Evaluating model f4 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.54it/s]


Accuracy: 79.60%

Evaluating model f5 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.56it/s]


Accuracy: 79.20%

Evaluating model f5 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:45<00:00, 54.80it/s]


Accuracy: 80.32%

Evaluating model f5 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.00it/s]


Accuracy: 78.32%

Evaluating model f5 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.16it/s]


Accuracy: 79.60%

Evaluating model f5 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.22it/s]


Accuracy: 79.04%

Evaluating model f6 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.82it/s]


Accuracy: 79.20%

Evaluating model f6 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.65it/s]


Accuracy: 80.32%

Evaluating model f6 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.31it/s]


Accuracy: 78.32%

Evaluating model f6 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.05it/s]


Accuracy: 79.60%

Evaluating model f6 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.48it/s]


Accuracy: 79.04%

Evaluating model f6 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 67.81it/s]


Accuracy: 80.68%

Evaluating model f7 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.38it/s]


Accuracy: 79.20%

Evaluating model f7 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.31it/s]


Accuracy: 80.32%

Evaluating model f7 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.94it/s]


Accuracy: 78.32%

Evaluating model f7 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 69.21it/s]


Accuracy: 79.60%

Evaluating model f7 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 67.70it/s]


Accuracy: 79.04%

Evaluating model f7 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.96it/s]


Accuracy: 80.68%

Evaluating model f7 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.96it/s]


Accuracy: 78.52%

Evaluating model f8 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.72it/s]


Accuracy: 79.20%

Evaluating model f8 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 71.51it/s]


Accuracy: 80.32%

Evaluating model f8 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.02it/s]


Accuracy: 78.32%

Evaluating model f8 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.92it/s]


Accuracy: 79.60%

Evaluating model f8 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:38<00:00, 64.62it/s]


Accuracy: 79.04%

Evaluating model f8 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.77it/s]


Accuracy: 80.68%

Evaluating model f8 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.46it/s]


Accuracy: 78.52%

Evaluating model f8 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.57it/s]


Accuracy: 79.16%

Evaluating model f9 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.40it/s]


Accuracy: 79.20%

Evaluating model f9 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.83it/s]


Accuracy: 80.32%

Evaluating model f9 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.22it/s]


Accuracy: 78.32%

Evaluating model f9 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 73.97it/s]


Accuracy: 79.60%

Evaluating model f9 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.59it/s]


Accuracy: 79.04%

Evaluating model f9 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 69.37it/s]


Accuracy: 80.68%

Evaluating model f9 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.66it/s]


Accuracy: 78.52%

Evaluating model f9 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:37<00:00, 66.61it/s]


Accuracy: 79.16%

Evaluating model f9 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.97it/s]


Accuracy: 80.00%

Evaluating model f10 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.45it/s]


Accuracy: 79.20%

Evaluating model f10 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.18it/s]


Accuracy: 80.32%

Evaluating model f10 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.06it/s]


Accuracy: 78.32%

Evaluating model f10 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 73.61it/s]


Accuracy: 79.60%

Evaluating model f10 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 69.14it/s]


Accuracy: 79.04%

Evaluating model f10 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.64it/s]


Accuracy: 80.68%

Evaluating model f10 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.53it/s]


Accuracy: 78.52%

Evaluating model f10 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.01it/s]


Accuracy: 79.16%

Evaluating model f10 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 71.93it/s]


Accuracy: 80.00%

Evaluating model f10 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.65it/s]


Accuracy: 80.24%

Accuracy Matrix for Task 1:
Model f1: 79.20%
Model f2: 79.20%	80.32%
Model f3: 79.20%	80.32%	78.32%
Model f4: 79.20%	80.32%	78.32%	79.60%
Model f5: 79.20%	80.32%	78.32%	79.60%	79.04%
Model f6: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%
Model f7: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%
Model f8: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%
Model f9: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%
Model f10: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%	80.24%

Starting Task 2: Processing datasets D11 to D20

Processing dataset D11


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 73.68it/s]


Model f11 updated.

Processing dataset D12


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.80it/s]


Model f12 updated.

Processing dataset D13


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.49it/s]


Model f13 updated.

Processing dataset D14


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.98it/s]


Model f14 updated.

Processing dataset D15


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.28it/s]


Model f15 updated.

Processing dataset D16


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.05it/s]


Model f16 updated.

Processing dataset D17


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.55it/s]


Model f17 updated.

Processing dataset D18


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 79.55it/s]


Model f18 updated.

Processing dataset D19


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.72it/s]


Model f19 updated.

Processing dataset D20


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.62it/s]


Model f20 updated.

Evaluating models on held-out datasets for Task 2...

Evaluating model f11 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 80.38it/s]


Accuracy: 79.20%

Evaluating model f11 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.82it/s]


Accuracy: 80.32%

Evaluating model f11 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.41it/s]


Accuracy: 78.32%

Evaluating model f11 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.16it/s]


Accuracy: 79.60%

Evaluating model f11 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.43it/s]


Accuracy: 79.04%

Evaluating model f11 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.18it/s]


Accuracy: 80.68%

Evaluating model f11 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.58it/s]


Accuracy: 78.52%

Evaluating model f11 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.09it/s]


Accuracy: 79.16%

Evaluating model f11 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:30<00:00, 80.65it/s]


Accuracy: 80.00%

Evaluating model f11 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.15it/s]


Accuracy: 80.24%

Evaluating model f11 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:30<00:00, 82.34it/s]


Accuracy: 65.68%

Evaluating model f12 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.74it/s]


Accuracy: 79.20%

Evaluating model f12 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.58it/s]


Accuracy: 80.32%

Evaluating model f12 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 79.23it/s]


Accuracy: 78.32%

Evaluating model f12 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.62it/s]


Accuracy: 79.60%

Evaluating model f12 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.23it/s]


Accuracy: 79.04%

Evaluating model f12 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.50it/s]


Accuracy: 80.68%

Evaluating model f12 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.18it/s]


Accuracy: 78.52%

Evaluating model f12 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.27it/s]


Accuracy: 79.16%

Evaluating model f12 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.14it/s]


Accuracy: 80.00%

Evaluating model f12 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.29it/s]


Accuracy: 80.24%

Evaluating model f12 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.39it/s]


Accuracy: 65.68%

Evaluating model f12 on dataset D'12


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.93it/s]


Accuracy: 70.68%

Evaluating model f13 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 71.53it/s]


Accuracy: 79.20%

Evaluating model f13 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.49it/s]


Accuracy: 80.32%

Evaluating model f13 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.83it/s]


Accuracy: 78.32%

Evaluating model f13 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.15it/s]


Accuracy: 79.60%

Evaluating model f13 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.51it/s]


Accuracy: 79.04%

Evaluating model f13 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 71.32it/s]


Accuracy: 80.68%

Evaluating model f13 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.84it/s]


Accuracy: 78.52%

Evaluating model f13 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.75it/s]


Accuracy: 79.16%

Evaluating model f13 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.08it/s]


Accuracy: 80.00%

Evaluating model f13 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:37<00:00, 67.32it/s]


Accuracy: 80.24%

Evaluating model f13 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.29it/s]


Accuracy: 65.68%

Evaluating model f13 on dataset D'12


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 69.34it/s]


Accuracy: 70.68%

Evaluating model f13 on dataset D'13


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.21it/s]


Accuracy: 70.40%

Evaluating model f14 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 71.68it/s]


Accuracy: 79.20%

Evaluating model f14 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.33it/s]


Accuracy: 80.32%

Evaluating model f14 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.32it/s]


Accuracy: 78.32%

Evaluating model f14 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 73.77it/s]


Accuracy: 79.60%

Evaluating model f14 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.50it/s]


Accuracy: 79.04%

Evaluating model f14 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.86it/s]


Accuracy: 80.68%

Evaluating model f14 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 73.74it/s]


Accuracy: 78.52%

Evaluating model f14 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.97it/s]


Accuracy: 79.16%

Evaluating model f14 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.82it/s]


Accuracy: 80.00%

Evaluating model f14 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.39it/s]


Accuracy: 80.24%

Evaluating model f14 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.53it/s]


Accuracy: 65.68%

Evaluating model f14 on dataset D'12


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.06it/s]


Accuracy: 70.68%

Evaluating model f14 on dataset D'13


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.21it/s]


Accuracy: 70.40%

Evaluating model f14 on dataset D'14


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.33it/s]


Accuracy: 60.20%

Evaluating model f15 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.36it/s]


Accuracy: 79.20%

Evaluating model f15 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.58it/s]


Accuracy: 80.32%

Evaluating model f15 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.30it/s]


Accuracy: 78.32%

Evaluating model f15 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 79.06it/s]


Accuracy: 79.60%

Evaluating model f15 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 79.94it/s]


Accuracy: 79.04%

Evaluating model f15 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.57it/s]


Accuracy: 80.68%

Evaluating model f15 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.84it/s]


Accuracy: 78.52%

Evaluating model f15 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.75it/s]


Accuracy: 79.16%

Evaluating model f15 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.13it/s]


Accuracy: 80.00%

Evaluating model f15 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.79it/s]


Accuracy: 80.24%

Evaluating model f15 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.61it/s]


Accuracy: 65.68%

Evaluating model f15 on dataset D'12


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 78.06it/s]


Accuracy: 70.68%

Evaluating model f15 on dataset D'13


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.73it/s]


Accuracy: 70.40%

Evaluating model f15 on dataset D'14


Extracting features: 100%|██████████| 2500/2500 [00:38<00:00, 65.77it/s]


Accuracy: 60.20%

Evaluating model f15 on dataset D'15


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 71.54it/s]


Accuracy: 75.36%

Evaluating model f16 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.40it/s]


Accuracy: 79.20%

Evaluating model f16 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.14it/s]


Accuracy: 80.32%

Evaluating model f16 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.88it/s]


Accuracy: 78.32%

Evaluating model f16 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.66it/s]


Accuracy: 79.60%

Evaluating model f16 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 73.66it/s]


Accuracy: 79.04%

Evaluating model f16 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.97it/s]


Accuracy: 80.68%

Evaluating model f16 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 71.73it/s]


Accuracy: 78.52%

Evaluating model f16 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 71.77it/s]


Accuracy: 79.16%

Evaluating model f16 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 69.12it/s]


Accuracy: 80.00%

Evaluating model f16 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.22it/s]


Accuracy: 80.24%

Evaluating model f16 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:37<00:00, 66.95it/s]


Accuracy: 65.68%

Evaluating model f16 on dataset D'12


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 73.79it/s]


Accuracy: 70.68%

Evaluating model f16 on dataset D'13


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.73it/s]


Accuracy: 70.40%

Evaluating model f16 on dataset D'14


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.64it/s]


Accuracy: 60.20%

Evaluating model f16 on dataset D'15


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.98it/s]


Accuracy: 75.36%

Evaluating model f16 on dataset D'16


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.72it/s]


Accuracy: 64.56%

Evaluating model f17 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.18it/s]


Accuracy: 79.20%

Evaluating model f17 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.56it/s]


Accuracy: 80.32%

Evaluating model f17 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.88it/s]


Accuracy: 78.32%

Evaluating model f17 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.03it/s]


Accuracy: 79.60%

Evaluating model f17 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.49it/s]


Accuracy: 79.04%

Evaluating model f17 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.12it/s]


Accuracy: 80.68%

Evaluating model f17 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 79.06it/s]


Accuracy: 78.52%

Evaluating model f17 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.72it/s]


Accuracy: 79.16%

Evaluating model f17 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.37it/s]


Accuracy: 80.00%

Evaluating model f17 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.86it/s]


Accuracy: 80.24%

Evaluating model f17 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.88it/s]


Accuracy: 65.68%

Evaluating model f17 on dataset D'12


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.80it/s]


Accuracy: 70.68%

Evaluating model f17 on dataset D'13


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.85it/s]


Accuracy: 70.40%

Evaluating model f17 on dataset D'14


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.67it/s]


Accuracy: 60.20%

Evaluating model f17 on dataset D'15


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.61it/s]


Accuracy: 75.36%

Evaluating model f17 on dataset D'16


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.25it/s]


Accuracy: 64.56%

Evaluating model f17 on dataset D'17


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.94it/s]


Accuracy: 62.88%

Evaluating model f18 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.16it/s]


Accuracy: 79.20%

Evaluating model f18 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 67.76it/s]


Accuracy: 80.32%

Evaluating model f18 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 71.05it/s]


Accuracy: 78.32%

Evaluating model f18 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.36it/s]


Accuracy: 79.60%

Evaluating model f18 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 69.15it/s]


Accuracy: 79.04%

Evaluating model f18 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.89it/s]


Accuracy: 80.68%

Evaluating model f18 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.18it/s]


Accuracy: 78.52%

Evaluating model f18 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.52it/s]


Accuracy: 79.16%

Evaluating model f18 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.20it/s]


Accuracy: 80.00%

Evaluating model f18 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.68it/s]


Accuracy: 80.24%

Evaluating model f18 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.71it/s]


Accuracy: 65.68%

Evaluating model f18 on dataset D'12


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.57it/s]


Accuracy: 70.68%

Evaluating model f18 on dataset D'13


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 71.18it/s]


Accuracy: 70.40%

Evaluating model f18 on dataset D'14


Extracting features: 100%|██████████| 2500/2500 [00:37<00:00, 66.61it/s]


Accuracy: 60.20%

Evaluating model f18 on dataset D'15


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.26it/s]


Accuracy: 75.36%

Evaluating model f18 on dataset D'16


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.87it/s]


Accuracy: 64.56%

Evaluating model f18 on dataset D'17


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 69.15it/s]


Accuracy: 62.88%

Evaluating model f18 on dataset D'18


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 73.23it/s]


Accuracy: 67.60%

Evaluating model f19 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 71.21it/s]


Accuracy: 79.20%

Evaluating model f19 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.97it/s]


Accuracy: 80.32%

Evaluating model f19 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.00it/s]


Accuracy: 78.32%

Evaluating model f19 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 69.01it/s]


Accuracy: 79.60%

Evaluating model f19 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 68.36it/s]


Accuracy: 79.04%

Evaluating model f19 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:37<00:00, 66.37it/s]


Accuracy: 80.68%

Evaluating model f19 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.03it/s]


Accuracy: 78.52%

Evaluating model f19 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.13it/s]


Accuracy: 79.16%

Evaluating model f19 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.99it/s]


Accuracy: 80.00%

Evaluating model f19 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 69.51it/s]


Accuracy: 80.24%

Evaluating model f19 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.67it/s]


Accuracy: 65.68%

Evaluating model f19 on dataset D'12


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 71.94it/s]


Accuracy: 70.68%

Evaluating model f19 on dataset D'13


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.80it/s]


Accuracy: 70.40%

Evaluating model f19 on dataset D'14


Extracting features: 100%|██████████| 2500/2500 [00:37<00:00, 67.05it/s]


Accuracy: 60.20%

Evaluating model f19 on dataset D'15


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.30it/s]


Accuracy: 75.36%

Evaluating model f19 on dataset D'16


Extracting features: 100%|██████████| 2500/2500 [00:36<00:00, 69.19it/s]


Accuracy: 64.56%

Evaluating model f19 on dataset D'17


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.79it/s]


Accuracy: 62.88%

Evaluating model f19 on dataset D'18


Extracting features: 100%|██████████| 2500/2500 [00:35<00:00, 70.91it/s]


Accuracy: 67.60%

Evaluating model f19 on dataset D'19


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.05it/s]


Accuracy: 73.60%

Evaluating model f20 on dataset D'1


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.22it/s]


Accuracy: 79.20%

Evaluating model f20 on dataset D'2


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.96it/s]


Accuracy: 80.32%

Evaluating model f20 on dataset D'3


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.48it/s]


Accuracy: 78.32%

Evaluating model f20 on dataset D'4


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 74.51it/s]


Accuracy: 79.60%

Evaluating model f20 on dataset D'5


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 80.20it/s]


Accuracy: 79.04%

Evaluating model f20 on dataset D'6


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.19it/s]


Accuracy: 80.68%

Evaluating model f20 on dataset D'7


Extracting features: 100%|██████████| 2500/2500 [00:34<00:00, 72.54it/s]


Accuracy: 78.52%

Evaluating model f20 on dataset D'8


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.93it/s]


Accuracy: 79.16%

Evaluating model f20 on dataset D'9


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.13it/s]


Accuracy: 80.00%

Evaluating model f20 on dataset D'10


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.07it/s]


Accuracy: 80.24%

Evaluating model f20 on dataset D'11


Extracting features: 100%|██████████| 2500/2500 [00:31<00:00, 78.40it/s]


Accuracy: 65.68%

Evaluating model f20 on dataset D'12


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.12it/s]


Accuracy: 70.68%

Evaluating model f20 on dataset D'13


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.29it/s]


Accuracy: 70.40%

Evaluating model f20 on dataset D'14


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.31it/s]


Accuracy: 60.20%

Evaluating model f20 on dataset D'15


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 75.94it/s]


Accuracy: 75.36%

Evaluating model f20 on dataset D'16


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 76.75it/s]


Accuracy: 64.56%

Evaluating model f20 on dataset D'17


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 73.55it/s]


Accuracy: 62.88%

Evaluating model f20 on dataset D'18


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 78.02it/s]


Accuracy: 67.60%

Evaluating model f20 on dataset D'19


Extracting features: 100%|██████████| 2500/2500 [00:33<00:00, 75.43it/s]


Accuracy: 73.60%

Evaluating model f20 on dataset D'20


Extracting features: 100%|██████████| 2500/2500 [00:32<00:00, 77.19it/s]


Accuracy: 71.00%

Accuracy Matrix for Task 2:
Model f11: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%	80.24%	65.68%
Model f12: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%	80.24%	65.68%	70.68%
Model f13: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%	80.24%	65.68%	70.68%	70.40%
Model f14: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%	80.24%	65.68%	70.68%	70.40%	60.20%
Model f15: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%	80.24%	65.68%	70.68%	70.40%	60.20%	75.36%
Model f16: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%	80.24%	65.68%	70.68%	70.40%	60.20%	75.36%	64.56%
Model f17: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%	80.24%	65.68%	70.68%	70.40%	60.20%	75.36%	64.56%	62.88%
Model f18: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	78.52%	79.16%	80.00%	80.24%	65.68%	70.68%	70.40%	60.20%	75.36%	64.56%	62.88%	67.60%
Model f19: 79.20%	80.32%	78.32%	79.60%	79.04%	80.68%	7