In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import torch

class cfg:
    FOLD = 0
    SEEDS = [42]
    batch_size = 64
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

data = pd.read_csv('dynamic_api_call_sequence_per_malware_100_0_306.csv')

data1 = data.drop(columns=['hash'],

                 axis=1)
data1 = data1.dropna(how='any')
print(data1.shape)
data_dict = {}
for label, group in data1.groupby('malware'):
    data_dict[label] = group.sample(min(group.shape[0], 4000))
data2 = pd.concat(data_dict.values())
data2 = data2.reset_index()
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5)
for fold, (_, val_idx) in enumerate(skf.split(data2, data2.malware)):
    data2.loc[val_idx, 'fold'] = fold
data2.fold = data2.fold.astype(int)
data2.to_csv('malware-analysis-1000.csv')
X_train = data2.query(f'fold!={cfg.FOLD}').drop(columns=['index', 'malware', 'fold'])
X_test = data2.query(f'fold=={cfg.FOLD}').drop(columns=['index', 'malware', 'fold'])

Y_train = data2.query(f'fold!={cfg.FOLD}').malware
Y1_test = data2.query(f'fold=={cfg.FOLD}').malware

# Checking the sizes of the datasets
print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")
# Initialize SMOTE
smote = SMOTE(random_state=42)

# Fit and transform the training data
X_train_res, y1_train_res = smote.fit_resample(X_train, Y_train)

# Check the distribution of classes after resampling
print(pd.Series(y1_train_res).value_counts())

# Initialize the scaler
scaler = StandardScaler()

# Fit and transform the training data, transform the test data
X1_train_res = scaler.fit_transform(X_train_res)
X1_test = scaler.transform(X_test)

In [None]:

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd

# Load dataset
df = pd.read_csv('MalBehavD-V1-dataset.csv')

# Tokenization - Break API call strings into tokens
df['api_calls'] = df.iloc[:, 2:].apply(lambda row: ' '.join(row.astype(str)), axis=1)

# Tokenizing API call sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['api_calls'])
X_tokenized = tokenizer.texts_to_sequences(df['api_calls'])

# Padding Sequences to ensure uniformity of input length
max_len = min(max(df['api_calls'].apply(lambda x: len(x.split()))), 100)
X_padded = pad_sequences(X_tokenized, maxlen=max_len, padding='post', truncating='post')

# One-Hot Encoding the 'labels' column
encoder = OneHotEncoder(sparse_output=False)
y_one_hot = encoder.fit_transform(df[['labels']])

# Min-Max Normalization for numerical columns
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
df[numeric_columns] = MinMaxScaler().fit_transform(df[numeric_columns])
print("Preprocessing completed successfully!")

# Prepare data for splitting into train/test sets
X = df.drop(columns=['labels'])
y = df['labels']

# Split into 70% training and 30% testing
X2_train, X2_test, y2_train, y2_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Check the size of the splits
print(f"Training set size: {X2_train.shape[0]}")
print(f"Testing set size: {X2_test.shape[0]}")

In [None]:
import csv
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

csv_path = "malware_API_dataset.csv"
data = []

with open(csv_path, "r", encoding="utf-8") as file:
    reader = csv.reader(file)
    for row in reader:
        # row[0] = malware_class
        # row[1] = sha256 (hash)
        # row[2:] = actual API call strings, e.g. ["NtOpenFile", "ReadFile", "CloseHandle"]
        malware_class = row[0]
        sha256        = row[1]
        api_calls     = row[2:]

        # Convert "malware_class" to 0 or 1
        # (If it contains "not-a-virus" => 0, else 1)
        label = 0 if "not-a-virus" in malware_class else 1

        # Join all API calls into a single space-delimited string
        api_calls_str = " ".join(api_calls)

        data.append([label, sha256, api_calls_str])

# Create a DataFrame
df_processed = pd.DataFrame(data, columns=["malware_class", "sha256", "api_calls"])

# Drop 'sha256'
df_processed.drop(columns=["sha256"], inplace=True)
print("Sample rows:\n", df_processed.head())

# MinMax Scale any numeric cols
numeric_cols = df_processed.select_dtypes(include=[np.number]).columns
numeric_cols = [col for col in numeric_cols if col != "malware_class"]

if len(numeric_cols) > 0:
    scaler = MinMaxScaler()
    df_processed[numeric_cols] = scaler.fit_transform(df_processed[numeric_cols])

# Tokenization & Padding for the 'api_calls' col
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df_processed["api_calls"])
X3_sequences = tokenizer.texts_to_sequences(df_processed["api_calls"])

# Pad the sequences
MAX_LEN = 300
X3_padded = pad_sequences(X3_sequences, maxlen=MAX_LEN, padding='post')

# One-Hot Encode the Label
encoder = OneHotEncoder(sparse_output=False)
y3_onehot = encoder.fit_transform(df_processed[["malware_class"]])

print("X3_padded shape =", X3_padded.shape)
print("y3_onehot shape =", y3_onehot.shape)
y3 = df_processed["malware_class"].values
print("y3 shape =", y3.shape)

In [None]:
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Combine API Sequences from Dataset 1 & 2
MAX_LEN = 300

# Combine API calls and encode malware labels
data2['api_calls'] = data2.drop(columns=['index', 'malware', 'fold']).astype(str).agg(' '.join, axis=1)
data2['malware_label'] = data2['malware'].astype('category').cat.codes

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data2['api_calls'].tolist() + df['api_calls'].tolist())

X1 = pad_sequences(tokenizer.texts_to_sequences(data2['api_calls']), maxlen=MAX_LEN, padding='post')
y1 = data2['malware_label'].values

X2 = pad_sequences(tokenizer.texts_to_sequences(df['api_calls']), maxlen=MAX_LEN, padding='post')
y2 = df['labels'].values

# Combine datasets
X_combined = np.vstack([X1, X2])
y_combined = np.hstack([y1, y2])

# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.3, random_state=42)

# PyTorch Dataset & Dataloader
class SequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.float).unsqueeze(1)  # Ensure the shape is (batch_size, 1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 64
train_loader = DataLoader(SequenceDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(SequenceDataset(X_test, y_test), batch_size=batch_size)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
#import torch.utils.checkpoint as checkpoint
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import math

# Hyperparameters
MAX_LEN = 300
batch_size = 64
embedding_dim = 128
hidden_dim = 256
learning_rate = 0.0004
epochs = 20
accumulation_steps = 4

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Gumbel-Softmax
def sample_gumbel_softmax(logits, tau=1.0):
    gumbel_noise = -torch.log(-torch.log(torch.rand_like(logits) + 1e-9) + 1e-9)
    y = logits + gumbel_noise
    return torch.nn.functional.softmax(y / tau, dim=-1)

# Generator with Dropout Regularization
class Generator(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, dropout=0.3):
        super(Generator, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.GRU(embedding_dim, hidden_dim, num_layers=2, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(self.dropout(output))
        return output


# Discriminator with GRU and Multihead Attention
class Discriminator(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, dropout=0.3):
        super(Discriminator, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.GRU(embedding_dim, hidden_dim, num_layers=2, batch_first=True, bidirectional=True)
        self.attn = nn.MultiheadAttention(embed_dim=hidden_dim * 2, num_heads=4, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, 1)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.embedding(x)
        rnn_output, _ = self.rnn(x)
        attn_output, _ = self.attn(rnn_output, rnn_output, rnn_output)
        output = self.fc(self.dropout(attn_output[:, -1, :]))
        return output

def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight.data)
        if m.bias is not None:
            m.bias.data.fill_(0.01)
    elif isinstance(m, nn.GRU):
        for name, param in m.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'bias' in name:
                param.data.fill_(0.01)

# Gradient Penalty Function (WGAN-GP style)
def gradient_penalty(discriminator, real_data, fake_data):
    batch_size = real_data.size(0)
    epsilon = torch.rand(batch_size, 1, 1).to(device)
    interpolated = (epsilon * real_data.unsqueeze(2) + (1 - epsilon) * fake_data.unsqueeze(2)).type(torch.float).requires_grad_(True).to(device)
    d_interpolated = discriminator(interpolated.long().squeeze(2))

    gradients = torch.autograd.grad(
        outputs=d_interpolated,
        inputs=interpolated,
        grad_outputs=torch.ones_like(d_interpolated).to(device),
        allow_unused=True,
        create_graph=True,
        retain_graph=True
    )[0]

    if gradients is not None:
        gradients = gradients.view(batch_size, -1)
    else:
        gradients = torch.zeros_like(interpolated).to(device)
    gradient_norm = gradients.norm(2, dim=1)
    return ((gradient_norm - 1) ** 2).mean()

# Feature Matching Loss
def feature_matching_loss(real_features, fake_features):
    return torch.mean((real_features - fake_features) ** 2)

# Dataset Class
class SequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.float).unsqueeze(1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

#Pretrain Generator
def pretrain_generator(generator, train_loader, gen_optimizer, vocab_size, device='cuda', epochs=5):
    print("📘 Starting Generator Pretraining...")
    generator.train()
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        total_loss = 0
        for sequences, _ in train_loader:
            sequences = sequences.to(device)
            inputs = sequences[:, :-1]  # All tokens except the last
            targets = sequences[:, 1:]  # All tokens except the first (shifted)

            gen_optimizer.zero_grad()
            logits = generator(inputs)
            logits = logits.view(-1, vocab_size)
            targets = targets.reshape(-1)

            loss = criterion(logits, targets)
            loss.backward()
            gen_optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Pretrain Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")


# Training Function (with Gradient Penalty and Feature Matching)
def train_adversarial(generator, discriminator, gen_optimizer, disc_optimizer, criterion, train_loader, val_loader, device, tau, vocab_size, lambda_gp=10, lambda_feat=5):
    generator.train()
    discriminator.train()
    total_train_loss = 0
    total_val_loss = 0

    for i, (real_sequences, real_labels) in enumerate(train_loader):
        real_sequences, real_labels = real_sequences.to(device), real_labels.to(device)

        # Discriminator training
        disc_optimizer.zero_grad()
        real_output = discriminator(real_sequences)
        real_loss = criterion(real_output, real_labels)

        noise = torch.randint(0, vocab_size, (real_sequences.size(0), MAX_LEN)).to(device)
        fake_logits = generator(noise)
        fake_probs = sample_gumbel_softmax(fake_logits, tau=tau)
        fake_indices = torch.multinomial(fake_probs.view(-1, vocab_size), 1).view(real_sequences.size(0), MAX_LEN)
        fake_output = discriminator(fake_indices.detach())
        fake_loss = criterion(fake_output, torch.zeros_like(real_labels))

        # Calculate Gradient Penalty
        gp_loss = gradient_penalty(discriminator, real_sequences, fake_indices)

        disc_loss = real_loss + fake_loss + lambda_gp * gp_loss
        disc_loss.backward()
        if (i + 1) % accumulation_steps == 0:
            disc_optimizer.step()
            disc_optimizer.zero_grad()

        # Generator training
        gen_optimizer.zero_grad()
        fake_logits = generator(noise)
        fake_probs = sample_gumbel_softmax(fake_logits, tau=tau)
        fake_indices = torch.argmax(fake_probs, dim=-1)
        fake_output_gen = discriminator(fake_indices)
        gen_loss = criterion(fake_output_gen, torch.ones_like(real_labels))

        # Feature Matching Loss
        real_features = discriminator(real_sequences)
        fake_features = discriminator(fake_indices)
        feat_loss = feature_matching_loss(real_features, fake_features)

        gen_loss += lambda_feat * feat_loss
        gen_loss.backward()

        if (i + 1) % accumulation_steps == 0:
            gen_optimizer.step()
            gen_optimizer.zero_grad()

        total_train_loss += disc_loss.item()

    # Validation Loss
    discriminator.eval()
    with torch.no_grad():
        for real_sequences, real_labels in val_loader:
            real_sequences, real_labels = real_sequences.to(device), real_labels.to(device)
            val_output = discriminator(real_sequences)
            val_loss = criterion(val_output, real_labels)
            total_val_loss += val_loss.item()

    return total_train_loss / len(train_loader), total_val_loss / len(val_loader)


# Evaluation Function with Threshold Adjustment
def evaluate_model(discriminator, test_loader, device, threshold=0.4):
    discriminator.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for sequences, labels in test_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            preds = discriminator(sequences)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred.round(), average='weighted', zero_division=0)
    auc = roc_auc_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred.round())
    return precision, recall, f1, auc, accuracy

# =======================================
# 🧪 Dataset Preparation
# Normalize binary labels
data2['malware_label'] = data2['malware'].astype('category').cat.codes
data2['malware_label'] = data2['malware_label'].apply(lambda x: 0 if x == 0 else 1)
df['labels'] = df['labels'].apply(lambda x: 0 if x == 0 else 1)

# Build API call sequences
data2['api_calls'] = data2.drop(columns=['index', 'malware', 'fold']).astype(str).agg(' '.join, axis=1)

# Tokenization and Padding
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data2['api_calls'].tolist() + df['api_calls'].tolist())
X1 = pad_sequences(tokenizer.texts_to_sequences(data2['api_calls']), maxlen=MAX_LEN, padding='post')
X2 = pad_sequences(tokenizer.texts_to_sequences(df['api_calls']), maxlen=MAX_LEN, padding='post')
y1 = data2['malware_label'].values
y2 = df['labels'].values
df_processed = df_processed[df_processed["malware_class"].notna()]
df_processed['api_calls'] = df_processed.iloc[:, 1:].apply(lambda row: ' '.join(row.astype(str)), axis=1)
X3_tokenized = tokenizer.texts_to_sequences(df_processed['api_calls'])
X3_padded = pad_sequences(X3_tokenized, maxlen=MAX_LEN, padding='post')
y3 = df_processed['malware_class'].values

X3_train, X3_test, y3_train, y3_test = train_test_split(X3_padded, y3, test_size=0.8, random_state=42, stratify=y3)
X_combined = np.vstack([X1, X2, X3_train])
y_combined = np.hstack([y1, y2, y3_train])

# Split data
X_train, X_temp, y_train, y_temp = train_test_split(X_combined, y_combined, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Dataloaders
train_loader = DataLoader(SequenceDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(SequenceDataset(X_val, y_val), batch_size=batch_size)
test_loader = DataLoader(SequenceDataset(X_test, y_test), batch_size=batch_size)

# Model Init
vocab_size = len(tokenizer.word_index) + 1
generator = Generator(vocab_size, embedding_dim, hidden_dim, vocab_size).to(device)
discriminator = Discriminator(vocab_size, embedding_dim, hidden_dim).to(device)
generator.apply(init_weights)
discriminator.apply(init_weights)
gen_optimizer = optim.Adam(generator.parameters(), lr=learning_rate)
disc_optimizer = optim.Adam(discriminator.parameters(), lr=learning_rate)
scheduler_gen = torch.optim.lr_scheduler.StepLR(gen_optimizer, step_size=5, gamma=0.5)
scheduler_disc = torch.optim.lr_scheduler.StepLR(disc_optimizer, step_size=5, gamma=0.5)
criterion = nn.BCEWithLogitsLoss()

# Pretraining
print("🔧 Pretraining Generator...")
pretrain_generator(generator, train_loader, gen_optimizer, vocab_size)
train_loss = []
val_loss = []

from sklearn.model_selection import StratifiedKFold

# Cross-validation settings
num_folds = 5
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

# Store metrics
fold_metrics = []

for fold, (train_idx, test_idx) in enumerate(skf.split(X_combined, y_combined)):
    print(f"\n================ Fold {fold + 1} / {num_folds} ================")

    X_train, X_test = X_combined[train_idx], X_combined[test_idx]
    y_train, y_test = y_combined[train_idx], y_combined[test_idx]

    # Split validation from train
    X_train_split, X_val, y_train_split, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=fold)

    # Dataloaders for this fold
    train_loader = DataLoader(SequenceDataset(X_train_split, y_train_split), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(SequenceDataset(X_val, y_val), batch_size=batch_size)
    test_loader = DataLoader(SequenceDataset(X_test, y_test), batch_size=batch_size)

    # Initialize models and optimizers fresh for each fold
    generator = Generator(vocab_size, embedding_dim, hidden_dim, vocab_size).to(device)
    discriminator = Discriminator(vocab_size, embedding_dim, hidden_dim).to(device)
    gen_optimizer = optim.Adam(generator.parameters(), lr=learning_rate)
    disc_optimizer = optim.Adam(discriminator.parameters(), lr=learning_rate)
    train_losses = []
    val_losses = []
    # Adversarial Training per fold
    for epoch in range(epochs):
        tau = max(0.5, 1.5 * math.exp(-0.1 * epoch))
        train_loss, val_loss = train_adversarial(generator, discriminator, gen_optimizer, disc_optimizer, criterion, train_loader, val_loader, device, tau, vocab_size)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        scheduler_gen.step()
        scheduler_disc.step()
        print(f"Epoch {epoch+1}/{epochs}, tau={tau:.4f}, Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")


    # Final Evaluation on test set of this fold
    precision, recall, f1, auc, accuracy = evaluate_model(discriminator, test_loader, device, threshold=0.4)
    print(f"\n📊 Fold {fold+1} Evaluation: Precision={precision:.4f}, Recall={recall:.4f}, F1={f1:.4f}, AUC={auc:.4f}, Accuracy={accuracy:.4f}")
    plt.plot(range(epochs), train_losses, label='Training Loss')
    plt.plot(range(epochs), val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.show()
    # Save metrics for averaging later
    fold_metrics.append({
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'auc': auc,
        'accuracy': accuracy
    })

# ====================
# 📈 Overall Metrics
# ====================

print("\n================ Cross-Validation Summary ================")
all_precisions = [m['precision'] for m in fold_metrics]
all_recalls = [m['recall'] for m in fold_metrics]
all_f1s = [m['f1'] for m in fold_metrics]
all_aucs = [m['auc'] for m in fold_metrics]
all_accuracies = [m['accuracy'] for m in fold_metrics]

print(f"Avg Precision: {np.mean(all_precisions):.4f}")
print(f"Avg Recall:    {np.mean(all_recalls):.4f}")
print(f"Avg F1 Score:  {np.mean(all_f1s):.4f}")
print(f"Avg AUC:       {np.mean(all_aucs):.4f}")
print(f"Avg Accuracy:  {np.mean(all_accuracies):.4f}")

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
    disp.plot(cmap=plt.cm.Blues)
    plt.show()

y_true = []
y_pred = []
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences, labels = sequences.to(device), labels.to(device)
        preds = discriminator(sequences)
        probs = torch.sigmoid(preds)  # Convert logits to probabilities
        pred_labels = (probs > 0.4).float()  # Adjust threshold to improve recall

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(pred_labels.cpu().numpy())

plot_confusion_matrix(np.array(y_true), np.array(y_pred))

🔧 Pretraining Generator...
📘 Starting Generator Pretraining...


AssertionError: Torch not compiled with CUDA enabled

In [None]:
# Dataset 3 DataLoader for testing
X3_tensor = torch.tensor(X3_test, dtype=torch.long)
y3_tensor = torch.tensor(y3_test, dtype=torch.float)
dataset3_loader = DataLoader(SequenceDataset(X3_tensor, y3_tensor), batch_size=batch_size)

# Final Evaluation on Dataset 3 (unseen test set)
precision3, recall3, f1_3, auc3, accuracy = evaluate_model(discriminator, dataset3_loader, device)
print(f"Evaluation Metrics on Dataset 3 - Precision: {precision3:.4f}, Recall: {recall3:.4f}, F1: {f1_3:.4f}, AUC: {auc3:.4f}, Accuracy: {accuracy:.4f}")

y_true = []
y_pred = []
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences, labels = sequences.to(device), labels.to(device)
        preds = discriminator(sequences)
        probs = torch.sigmoid(preds)  # Convert logits to probabilities
        pred_labels = (probs > 0.4).float()  # Adjust threshold to improve recall

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(pred_labels.cpu().numpy())

plot_confusion_matrix(np.array(y_true), np.array(y_pred))

In [None]:
#treshold kam kr do