<a href="https://colab.research.google.com/github/itsmeeeeeee/MML/blob/main/MMF_Ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


#  Ensemble Method on (MFA) Multimodal Fusion (ohne Self-Attention) for Multimodal Sentiment Analysis

Group: 5

In [None]:
! pip install torch torchvision
! pip install transformers pandas numpy



In [1]:
from torchvision import models, transforms
from PIL import Image, ImageFile

import os
import numpy as np
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split

#import torch.nn.functional as F

In [2]:
#Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
# Import necessary libraries

import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split


**Data extraction is made with Resnet and Bert by Features_extraction file:** https://colab.research.google.com/drive/144qkv0HiAqRXuwlfGNi9M-DxNGmBffYV?usp=drive_link

### **Read the data**

In [4]:
import pandas as pd
import numpy as np
import torch

# Load labels from CSV
file_path_labels = "/content/drive/MyDrive/MultimodalNLP/projekt/data/labels.csv"
labeled_data = pd.read_csv(file_path_labels)
labels = labeled_data['overall_sentiment'].values

# Define your existing mapping
label_mapping = {
    "very_negative": 0,
    "negative": 0,
    "positive": 1,
    "very_positive": 1
}

# Get valid indices for labels that exist in the mapping
valid_indices = [i for i, label in enumerate(labels) if label in label_mapping]
filtered_labels = [labels[i] for i in valid_indices]

# Apply the mapping to convert filtered text labels to numeric labels
numeric_labels = np.array([label_mapping[label] for label in filtered_labels])

# Convert labels to a torch tensor
numeric_labels = torch.tensor(numeric_labels, dtype=torch.long)
print("numeric labels:", numeric_labels.shape)

# Load image features
file_path_im = "/content/drive/MyDrive/MultimodalNLP/projekt/features_data/image_features_restnet.npy"
image_features = np.load(file_path_im)

# Load text features
file_path_emb = "/content/drive/MyDrive/MultimodalNLP/projekt/features_data/text_features_bert.npy"
text_features = np.load(file_path_emb)

# Filter image and text features using valid indices
filtered_image_features = torch.tensor(image_features[valid_indices], dtype=torch.float32)
filtered_text_features = torch.tensor(text_features[valid_indices], dtype=torch.float32)

print("Filtered Image features:", filtered_image_features.shape)
print("Filtered Text features:", filtered_text_features.shape)


numeric labels: torch.Size([4791])
Filtered Image features: torch.Size([4791, 1000])
Filtered Text features: torch.Size([4791, 768])


### **Split the data**

In [5]:
from sklearn.model_selection import train_test_split


train_text_bert, test_text_bert, train_image_restnet, test_image_restnet, train_labels, test_labels = train_test_split(filtered_text_features,
    filtered_image_features, numeric_labels, test_size=0.2, random_state=42)  #20%



In [6]:
len(train_text_bert)

3832

In [7]:
from torch.utils.data import DataLoader, TensorDataset


In [8]:

# Convert in Tendors


train_dataset_bert_restnet = TensorDataset(train_text_bert, train_image_restnet, train_labels)

test_dataset_bert_restnet = TensorDataset(test_text_bert, test_image_restnet, test_labels)

test_loader = DataLoader(test_dataset_bert_restnet, batch_size=64, shuffle=False)




## **Build the Models**

### **LSTM**

In [9]:

class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_classes):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)  # For bidirectional LSTM

    def forward(self, x):
        # Initialize hidden and cell states
        batch_size = x.size(0)
        num_directions = 2 if self.lstm.bidirectional else 1
        h0 = torch.zeros(self.lstm.num_layers * num_directions, batch_size, self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers * num_directions, batch_size, self.lstm.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # Use the last timestep
        return out

class MultimodalFusionLSTMClassifier(nn.Module):
    def __init__(self, text_dim, image_dim, hidden_dim, lstm_hidden_dim, num_classes):
        super(MultimodalFusionLSTMClassifier, self).__init__()
        self.text_model = nn.Linear(text_dim, hidden_dim)
        self.image_model = nn.Linear(image_dim, hidden_dim)
        # LSTM Classifier directly uses combined features
        self.classifier = LSTMClassifier(hidden_dim * 2, lstm_hidden_dim, num_layers=2, num_classes=num_classes)

    def forward(self, text_features, image_features):
        # Process text and image features through linear layers
        text_features = self.text_model(text_features)
        image_features = self.image_model(image_features)

        # Combine text and image features
        combined_features = torch.cat([text_features, image_features], dim=1)
        combined_features = combined_features.unsqueeze(1)  # Adding sequence dimension for LSTM

        # Classification
        output = self.classifier(combined_features)
        return output


In [10]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import KFold

def evaluate_model(model, loader, device):
    """Evaluate the model on given data loader and return the average loss."""
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for text_features, image_features, labels in loader:
            text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
            outputs = model(text_features, image_features)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            total_loss += loss.item()
    return total_loss / len(loader)

def train_and_save_best_models_lstm(train_dataset, num_folds=5, num_epochs=10):
    kf = KFold(n_splits=num_folds)
    best_models = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
        train_subset = Subset(train_dataset, train_idx)
        val_subset = Subset(train_dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
        dev_loader = DataLoader(val_subset, batch_size=64, shuffle=True)

        model = MultimodalFusionLSTMClassifier(text_dim=768, image_dim=1000, hidden_dim=128, lstm_hidden_dim=256, num_classes=2).to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        best_val_loss = float('inf')
        best_model_state = None

        for epoch in range(num_epochs):
            model.train()
            for text_features, image_features, labels in train_loader:
                text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(text_features, image_features)
                loss = nn.CrossEntropyLoss()(outputs, labels)
                loss.backward()
                optimizer.step()

        # Evaluate on the development set
        dev_loss = evaluate_model(model, dev_loader, device)
        if dev_loss < best_val_loss:
            best_val_loss = dev_loss
            best_model_state = model



        best_models.append(best_model_state)
        torch.save(best_model_state, f'best_model_fold_{fold}.pth')
        print(f"Fold {fold}: Best Validation Loss: {best_val_loss:.4f}")

    return best_models


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_models_lstm = train_and_save_best_models_lstm(train_dataset_bert_restnet,num_folds=5, num_epochs=10)


Fold 0: Best Validation Loss: 0.4248
Fold 1: Best Validation Loss: 0.3901
Fold 2: Best Validation Loss: 0.3967
Fold 3: Best Validation Loss: 0.4229
Fold 4: Best Validation Loss: 0.4233


###**RNN**

In [12]:

class Classifier_RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_classes):
        super(Classifier_RNN, self).__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=False)
        self.fc = nn.Linear(hidden_dim, num_classes)  # Unidirectional RNN

    def forward(self, x):
        # Initialize the hidden state
        batch_size = x.size(0)
        h0 = torch.zeros(self.rnn.num_layers, batch_size, self.rnn.hidden_size).to(x.device)
        # Process inputs through the RNN
        out, _ = self.rnn(x, h0)
        # Use the output from the last timestep
        out = self.fc(out[:, -1, :])
        return out

class MultimodalFusionRNNClassifier(nn.Module):
    def __init__(self, text_dim, image_dim, hidden_dim, rnn_hidden_dim, num_classes):
        super(MultimodalFusionRNNClassifier, self).__init__()
        self.text_model = nn.Linear(text_dim, hidden_dim)
        self.image_model = nn.Linear(image_dim, hidden_dim)
        # RNN Classifier
        self.classifier = Classifier_RNN(hidden_dim * 2, rnn_hidden_dim, num_layers=2, num_classes=num_classes)

    def forward(self, text_features, image_features):
        # Process text and image features directly through their respective models
        text_features = self.text_model(text_features)
        image_features = self.image_model(image_features)
        # Combine the text and image features
        combined_features = torch.cat([text_features, image_features], dim=1)
        combined_features = combined_features.unsqueeze(1)  # Adding a sequence dimension for RNN
        # Classification
        output = self.classifier(combined_features)
        return output


In [13]:
import copy
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import KFold

def evaluate_model(model, loader, device):
    """Evaluate the model on given data loader and return the average loss."""
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for text_features, image_features, labels in loader:
            text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
            outputs = model(text_features, image_features)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            total_loss += loss.item()
    return total_loss / len(loader)

def train_and_save_best_models_rnn(train_dataset, num_folds=5, num_epochs=10):
    kf = KFold(n_splits=num_folds)
    best_models = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
        train_subset = Subset(train_dataset, train_idx)
        val_subset = Subset(train_dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
        dev_loader = DataLoader(val_subset, batch_size=64, shuffle=True)

        model = MultimodalFusionRNNClassifier(text_dim=768, image_dim=1000, hidden_dim=128, rnn_hidden_dim=128, num_classes=2).to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        best_val_loss = float('inf')
        best_model_state = None

        for epoch in range(num_epochs):
            model.train()
            for text_features, image_features, labels in train_loader:
                text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(text_features, image_features)
                loss = nn.CrossEntropyLoss()(outputs, labels)
                loss.backward()
                optimizer.step()

        # Evaluate on the development set
        dev_loss = evaluate_model(model, dev_loader, device)
        if dev_loss < best_val_loss:
            best_val_loss = dev_loss
            best_model_state = copy.deepcopy(model)



        best_models.append(best_model_state)
        torch.save(best_model_state, f'best_model_fold_{fold}.pth')
        print(f"Fold {fold}: Best Validation Loss: {best_val_loss:.4f}")

    return best_models


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_models_rrn = train_and_save_best_models_rnn(train_dataset_bert_restnet,num_folds=5, num_epochs=10)

Fold 0: Best Validation Loss: 0.4207
Fold 1: Best Validation Loss: 0.4062
Fold 2: Best Validation Loss: 0.3981
Fold 3: Best Validation Loss: 0.4333
Fold 4: Best Validation Loss: 0.4149


###**CNN**

In [14]:


class CNNClassifier(nn.Module):
    def __init__(self, input_channels, num_channels, num_classes, hidden_dim):
        super(CNNClassifier, self).__init__()
        kernel_size = 3
        padding = 1
        stride = 1
        pool_kernel_size = 2
        pool_stride = 2

        # Calculate the size after each layer
        size_after_conv = (hidden_dim + 2 * padding - (kernel_size - 1) - 1) // stride + 1
        size_after_pool = size_after_conv // pool_stride

        size_after_conv2 = (size_after_pool + 2 * padding - (kernel_size - 1) - 1) // stride + 1
        final_size = size_after_conv2 // pool_stride

        self.conv1 = nn.Conv1d(input_channels, num_channels, kernel_size, padding=padding)
        self.conv2 = nn.Conv1d(num_channels, num_channels * 2, kernel_size, padding=padding)
        self.pool = nn.MaxPool1d(pool_kernel_size, stride=pool_stride)

        # Fully connected layer input size calculation
        self.fc_input_size = num_channels * 2 * final_size
        self.fc = nn.Linear(self.fc_input_size, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

class MultimodalFusionCNNClassifier(nn.Module):
    def __init__(self, text_dim, image_dim, hidden_dim, num_classes):
        super(MultimodalFusionCNNClassifier, self).__init__()
        self.text_model = nn.Linear(text_dim, hidden_dim)
        self.image_model = nn.Linear(image_dim, hidden_dim)
        self.classifier = CNNClassifier(2, hidden_dim, num_classes, hidden_dim)
        self.hidden_dim = hidden_dim  # Ensure hidden_dim is stored as an instance variable

    def forward(self, text_features, image_features):
        text_features = self.text_model(text_features)
        image_features = self.image_model(image_features)
        # Combine text and image features
        combined_features = torch.cat([text_features, image_features], dim=1)
        combined_features = combined_features.view(-1, 2, self.hidden_dim)  # Use self.hidden_dim correctly
        # Classification
        output = self.classifier(combined_features)
        return output



In [15]:
import copy
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import KFold

def evaluate_model(model, loader, device):
    """Evaluate the model on given data loader and return the average loss."""
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for text_features, image_features, labels in loader:
            text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
            outputs = model(text_features, image_features)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            total_loss += loss.item()
    return total_loss / len(loader)

def train_and_save_best_models_cnn(train_dataset, num_folds=5, num_epochs=10):
    kf = KFold(n_splits=num_folds)
    best_models = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
        train_subset = Subset(train_dataset, train_idx)
        val_subset = Subset(train_dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
        dev_loader = DataLoader(val_subset, batch_size=64, shuffle=True)

        model = MultimodalFusionCNNClassifier(text_dim=768, image_dim=1000, hidden_dim=128, num_classes=2).to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        best_val_loss = float('inf')
        best_model_state = None

        for epoch in range(num_epochs):
            model.train()
            for text_features, image_features, labels in train_loader:
                text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(text_features, image_features)
                loss = nn.CrossEntropyLoss()(outputs, labels)
                loss.backward()
                optimizer.step()

        # Evaluate on the development set
        dev_loss = evaluate_model(model, dev_loader, device)
        if dev_loss < best_val_loss:
            best_val_loss = dev_loss
            best_model_state = copy.deepcopy(model)



        best_models.append(best_model_state)
        torch.save(best_model_state, f'best_model_fold_{fold}.pth')
        print(f"Fold {fold}: Best Validation Loss: {best_val_loss:.4f}")

    return best_models


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_models_crn = train_and_save_best_models_cnn(train_dataset_bert_restnet,num_folds=5, num_epochs=10)

Fold 0: Best Validation Loss: 0.4878
Fold 1: Best Validation Loss: 0.4358
Fold 2: Best Validation Loss: 0.4179
Fold 3: Best Validation Loss: 0.5207
Fold 4: Best Validation Loss: 0.5260


### **Simple model with softmax**

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleFusionClassifier(nn.Module):
    def __init__(self, text_dim, image_dim, hidden_dim, num_classes):
        super(SimpleFusionClassifier, self).__init__()
        self.text_model = nn.Linear(text_dim, hidden_dim)
        self.image_model = nn.Linear(image_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)

    def forward(self, text_features, image_features):
        # Process text and image features through their respective linear transformations
        text_features = self.text_model(text_features)
        image_features = self.image_model(image_features)

        # Concatenate features from both modalities
        combined_features = torch.cat([text_features, image_features], dim=1)

        # Pass the combined features through the final fully connected layer
        # The outputs are logits and should not be passed through softmax here when using CrossEntropyLoss
        outputs = self.fc(combined_features)

        # It is important not to apply softmax here because CrossEntropyLoss does it internally
        return outputs


In [17]:

def train_and_evaluate(train_dataset, num_folds=5, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    kf = KFold(n_splits=num_folds)
    best_models = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
        train_subset = Subset(train_dataset, train_idx)
        val_subset = Subset(train_dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
        dev_loader = DataLoader(val_subset, batch_size=64, shuffle=True)

        model = SimpleFusionClassifier(text_dim=768, image_dim=1000, hidden_dim=128, num_classes=2).to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        best_val_loss = float('inf')

        for epoch in range(num_epochs):
            model.train()
            for text_features, image_features, labels in train_loader:
                text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(text_features, image_features)
                loss = nn.CrossEntropyLoss()(outputs, labels)
                loss.backward()
                optimizer.step()

            dev_loss = evaluate_model(model, dev_loader, device)
            if dev_loss < best_val_loss:
                best_val_loss = dev_loss
                best_model_state = model



        best_models.append(best_model_state)
        torch.save(best_model_state, f'best_model_fold_{fold}.pth')
        print(f"Fold {fold}: Best Validation Loss: {best_val_loss:.4f}")

    return best_models

def evaluate_model(model, loader, device):
    model.eval()
    total_loss = 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for text_features, image_features, labels in loader:
            text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
            outputs = model(text_features, image_features)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
    return total_loss / len(loader)


simpel_model=train_and_evaluate(train_dataset_bert_restnet,num_folds=5, num_epochs=10)






Fold 0: Best Validation Loss: 0.4161
Fold 1: Best Validation Loss: 0.3901
Fold 2: Best Validation Loss: 0.3943
Fold 3: Best Validation Loss: 0.4132
Fold 4: Best Validation Loss: 0.4093


### **Multi Layer Perzeptron**


In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MLPClassifier(nn.Module):
    def __init__(self, text_dim, image_dim, hidden_dim, num_classes):
        super(MLPClassifier, self).__init__()
        # Define linear layers to process text and image inputs
        self.text_model = nn.Linear(text_dim, hidden_dim)
        self.image_model = nn.Linear(image_dim, hidden_dim)

        # Define MLP layers to process combined features
        self.fc1 = nn.Linear(hidden_dim * 2, hidden_dim * 4)  # First MLP layer
        self.fc2 = nn.Linear(hidden_dim * 4, hidden_dim * 2)  # Second MLP layer
        self.fc3 = nn.Linear(hidden_dim * 2, num_classes)     # Output layer

    def forward(self, text_features, image_features):
        # Process text and image features through linear layers
        text_features = self.text_model(text_features)
        image_features = self.image_model(image_features)

        # Combine the features from both modalities
        combined_features = torch.cat([text_features, image_features], dim=1)

        # Process combined features through MLP layers
        combined_features = F.relu(self.fc1(combined_features))
        combined_features = F.relu(self.fc2(combined_features))
        output = self.fc3(combined_features)
        return output


In [19]:


def train_and_evaluate(train_dataset, num_folds=5, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    kf = KFold(n_splits=num_folds)
    best_models = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
        train_subset = Subset(train_dataset, train_idx)
        val_subset = Subset(train_dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
        dev_loader = DataLoader(val_subset, batch_size=64, shuffle=True)

        model = MLPClassifier(text_dim=768, image_dim=1000, hidden_dim=128, num_classes=2).to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        best_val_loss = float('inf')

        for epoch in range(num_epochs):
            model.train()
            for text_features, image_features, labels in train_loader:
                text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(text_features, image_features)
                loss = nn.CrossEntropyLoss()(outputs, labels)
                loss.backward()
                optimizer.step()

            dev_loss = evaluate_model(model, dev_loader, device)
            if dev_loss < best_val_loss:
                best_val_loss = dev_loss
                best_model_state = copy.deepcopy(model)

        best_models.append(best_model_state)
        torch.save(best_model_state, f'best_model_fold_{fold}.pth')
        print(f"Fold {fold}: Best Validation Loss: {best_val_loss:.4f}")

    return best_models

def evaluate_model(model, loader, device):
    model.eval()
    total_loss = 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for text_features, image_features, labels in loader:
            text_features, image_features, labels = text_features.to(device), image_features.to(device), labels.to(device)
            outputs = model(text_features, image_features)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

    return total_loss / len(loader)


best_models_mlp=train_and_evaluate(train_dataset_bert_restnet,num_folds=5, num_epochs=10)

Fold 0: Best Validation Loss: 0.4121
Fold 1: Best Validation Loss: 0.3871
Fold 2: Best Validation Loss: 0.3915
Fold 3: Best Validation Loss: 0.4135
Fold 4: Best Validation Loss: 0.4082


## Logits and Majority voting

In [20]:


import torch

def get_logits_from_models(models, test_loader):
    all_model_logits = []  # Diese Liste speichert die Logits von jedem Modell

    for model in models:
        model.eval()  # Setzt das Modell in den Evaluierungsmodus
        model_logits = []  # Eine Liste, um Logits f端r das aktuelle Modell zu speichern

        with torch.no_grad():  # Deaktiviert die Gradientenberechnung
            for text_feature, image_feature, _ in test_loader:
                # Erhalte die Logits f端r die aktuellen Features
                logits = model(text_feature, image_feature)

                model_logits.append(logits)  # F端ge die Logits zur Liste hinzu



        model_logits_tensor = torch.cat(model_logits , dim=0)
        all_model_logits.append(model_logits_tensor)

    return all_model_logits  # Gibt eine Liste von Tensoren zur端ck, jeweils ein Tensor pro Modell




def get_label_from_logits(logits):
    """Convert averaged logits to labels."""
    return torch.argmax(logits, dim=1)

def average_logits(logits_list):
    """Average logits across different models."""
    stacked_logits = torch.stack(logits_list, dim=0)
    return torch.mean(stacked_logits, dim=0)

def majority_vote(labels_list):
    """Perform a majority vote across different architectures."""
    # Assuming labels_list is a list of tensors, one per architecture
    labels_array = torch.stack(labels_list, dim=0)
    labels_mode, _ = torch.mode(labels_array, dim=0)  # Get the mode along the first dim
    return labels_mode


def calculate_accuracy(predictions, labels):
    correct = (predictions == labels).sum().item()
    total = len(labels)
    accuracy = correct / total
    return accuracy



## Ensemble evaluation

In [21]:

models=[best_models_rrn,simpel_model,best_models_lstm,best_models_mlp,best_models_crn]


all_logits=[]

for model in models:
  all_logits.append(get_logits_from_models(model, test_loader))



final_labels_for_each_arch=[]

for logits_list in all_logits:
    get_label=get_label_from_logits(average_logits(logits_list))
    final_labels_for_each_arch.append(get_label)

final_vote_labels = majority_vote(final_labels_for_each_arch)

print(calculate_accuracy(final_vote_labels,test_labels))

0.8873826903023984


In [22]:
from sklearn.metrics import f1_score

score = f1_score(test_labels, final_vote_labels, average='macro')
print("macro F1 Score: ", score)

macro F1 Score:  0.47016574585635357
