
**SVM**





In [None]:
# Import necessary packages
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.metrics import f1_score, precision_score, roc_auc_score, jaccard_score, hamming_loss
import numpy as np


# Function to get the level of a label
def get_label_level(label):
    return label.count('/') + 1

# Function to calculate hops between two labels
def calculate_hops(label1, label2):
    # Find the common ancestor between the labels
    ancestors1 = hierarchy.get(label1, [])
    ancestors2 = hierarchy.get(label2, [])
    common_ancestor_level = -1

    # Find the deepest common ancestor
    for ancestor in ancestors1:
        if ancestor in ancestors2:
            common_ancestor_level = get_label_level(ancestor)
            break

    # If there's no common ancestor, set hops as -1 to indicate this case
    if common_ancestor_level == -1:
        return -1

    # Calculate the hops by subtracting levels
    hops = get_label_level(label1) + get_label_level(label2) - 2 * common_ancestor_level
    return hops
# Step 1: Load the dataset
df = pd.read_csv('/content/selected_features_bugs_final (1).csv')

# Step 2: Preprocess the labels
df['Label'] = df['Label'].apply(lambda x: x.split('@'))
mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(df['Label'])

# The features (assuming the features are in the other columns)
X = df.drop(columns=['Label'])

# Step 3: Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Train an SVM for each label
svm_classifiers = []
Y_pred_all = []
Y_prob_all = []
Y_test_all = []

for i, label in enumerate(mlb.classes_):
    # Split data into train and test sets
    X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y[:, i], test_size=0.2, random_state=42)

    # Check if both classes are present in the training set
    if len(np.unique(Y_train)) < 2:
        print(f"Skipping label '{label}' due to insufficient classes in training set.")
        continue

    # Initialize SVM with probability estimates
    svm = SVC(probability=True, class_weight='balanced')

    # Train SVM
    svm.fit(X_train, Y_train)

    # Store the classifier
    svm_classifiers.append(svm)

    # Predict on the test set
    Y_pred = svm.predict(X_test)
    Y_prob = svm.predict_proba(X_test)[:, 1]  # Probability for ROC AUC

    # Store predictions and true labels for all classifiers
    Y_pred_all.append(Y_pred)
    Y_prob_all.append(Y_prob)
    Y_test_all.append(Y_test)

# Stack the predictions and ground truths for all labels
Y_pred_all = np.column_stack(Y_pred_all)
Y_test_all = np.column_stack(Y_test_all)
Y_prob_all = np.column_stack(Y_prob_all)

# Convert binary predictions back to label format
def binary_to_labels(Y_binary, classes):
    label_list = []
    for row in Y_binary:
        labels = [classes[i] for i, val in enumerate(row) if val == 1]
        label_list.append(labels)
    return label_list

# Convert binary matrices to label format for true and predicted labels
true_labels_list = binary_to_labels(Y_test_all, mlb.classes_)
pred_labels_list = binary_to_labels(Y_pred_all, mlb.classes_)

# Hierarchical Labels
hierarchical_labels = [
    "data",
    "data/structure",
    "data/structure/column",
    "data/structure/row",
    "data/structure/field",
    "data/database",
    "data/database/hbase",
    "data/database/mssql",
    "data/database/oracle",
    "data/integrity",
    "data/integrity/changed",
    "data/integrity/missing",
    "data/integrity/wrong",
    "data/manipulation",
    "data/manipulation/adjust",
    "data/manipulation/filter",
    "data/manipulation/import-export",
    "data/manipulation/save-delete",
    "data/manipulation/sort",
    "data/format",
    "data/type",
    "reliability",
    "reliability/performance",
    "reliability/performance/latency",
    "reliability/security",
    "reliability/code-issues",
    "reliability/error-handling",
    "reliability/error-handling/exceptions",
    "reliability/error-handling/unexpected-errors",
    "reliability/error-handling/untriggered-errors",
    "reliability/failures",
    "reliability/failures/process",
    "reliability/failures/server",
    "operation",
    "operation/fcr",
    "operation/ignis",
    "operation/staging",
    "operation/validation",
    "operation/designstudio",
    "operation/designstudio/pipeline",
    "operation/designstudio/schema",
    "operation/designstudio/product",
    "interface",
    "interface/button",
    "interface/button/clickbehavior",
    "interface/button/enable-disable",
    "interface/display",
    "interface/display/incorrect",
    "interface/display/missing",
    "interface/layout",
    "interface/layout/box",
    "interface/layout/grid",
    "interface/navigation",
    "interface/navigation/menu",
    "interface/navigation/search",
]

# Create a mapping of labels to their ancestors
hierarchy = {}

for label in hierarchical_labels:
    parts = label.split('/')
    # Initialize the list of ancestors for the current label
    ancestors = []
    # Iterate over parts to get all ancestors
    for i in range(len(parts) - 1):  # Exclude the last part (the label itself)
        ancestor = '/'.join(parts[:i + 1])  # Create ancestor path
        ancestors.append(ancestor)
    hierarchy[label] = ancestors

# Calculate hierarchical precision and recall
def extend_labels(labels):
    """Extend labels with their ancestors."""
    extended_labels = set(labels)
    for label in labels:
        extended_labels.update(hierarchy.get(label, []))
    return extended_labels

def calculate_hierarchical_metrics(y_true, y_pred):
    hP_numerator = 0
    hP_denominator = 0
    hR_numerator = 0
    hR_denominator = 0

    for true_labels, pred_labels in zip(y_true, y_pred):
        extended_true = extend_labels(true_labels)
        extended_pred = extend_labels(pred_labels)

        # Hierarchical Precision
        intersection = len(extended_true.intersection(extended_pred))
        hP_numerator += intersection
        hP_denominator += len(extended_pred)

        # Hierarchical Recall
        hR_numerator += intersection
        hR_denominator += len(extended_true)

    hP = hP_numerator / hP_denominator if hP_denominator > 0 else 0
    hR = hR_numerator / hR_denominator if hR_denominator > 0 else 0

    return hP, hR

In [None]:
from sklearn.metrics import average_precision_score

# Calculate hierarchical F1 Score based on hP and hR
def calculate_hierarchical_f1(hP, hR):
    return (2 * hP * hR) / (hP + hR) if (hP + hR) > 0 else 0

# Calculate Hierarchical Precision, Recall, and F1-score
hP, hR = calculate_hierarchical_metrics(true_labels_list, pred_labels_list)
hF1 = calculate_hierarchical_f1(hP, hR)

print(f"Hierarchical Precision (hP): {hP:.4f}")
print(f"Hierarchical Recall (hR): {hR:.4f}")
print(f"Hierarchical F1 Score (hF1): {hF1:.4f}")

# Standard (Flat) Metrics
# Flatten the true and predicted binary label arrays for micro/macro calculations
Y_test_flat = Y_test_all.ravel()
Y_pred_flat = Y_pred_all.ravel()
Y_prob_flat = Y_prob_all.ravel()

# Micro and Macro Precision, Recall, F1, and Jaccard Score
micro_f1 = f1_score(Y_test_all, Y_pred_all, average='micro')
macro_f1 = f1_score(Y_test_all, Y_pred_all, average='macro')
micro_precision = precision_score(Y_test_all, Y_pred_all, average='micro')
macro_precision = precision_score(Y_test_all, Y_pred_all, average='macro')
micro_recall = precision_score(Y_test_all, Y_pred_all, average='micro')
macro_recall = precision_score(Y_test_all, Y_pred_all, average='macro')
micro_jaccard = jaccard_score(Y_test_all, Y_pred_all, average='micro')
macro_jaccard = jaccard_score(Y_test_all, Y_pred_all, average='macro')

# AUPRC and ROCAUC (only applicable for binary or multilabel case)
try:
    micro_auprc = average_precision_score(Y_test_all, Y_prob_all, average='micro')
    micro_rocauc = roc_auc_score(Y_test_all, Y_prob_all, average='micro')
except ValueError as e:
    print("Error calculating AUPRC/ROCAUC:", e)
    micro_auprc = None
    micro_rocauc = None

# Hamming Loss
hamming_loss_value = hamming_loss(Y_test_all, Y_pred_all)

# Display Results
print(f"Micro F1 Score: {micro_f1:.4f}")
print(f"Macro F1 Score: {macro_f1:.4f}")
print(f"Micro Precision: {micro_precision:.4f}")
print(f"Macro Precision: {macro_precision:.4f}")
print(f"Micro Recall: {micro_recall:.4f}")
print(f"Macro Recall: {macro_recall:.4f}")
print(f"Micro Jaccard Score: {micro_jaccard:.4f}")
print(f"Macro Jaccard Score: {macro_jaccard:.4f}")
if micro_auprc is not None:
    print(f"Micro AUPRC: {micro_auprc:.4f}")
if micro_rocauc is not None:
    print(f"Micro ROCAUC: {micro_rocauc:.4f}")
print(f"Hamming Loss: {hamming_loss_value:.4f}")


In [None]:
# Function to calculate hierarchical metrics for each instance
def calculate_hierarchical_metrics_instance(true_labels, pred_labels):
    extended_true = extend_labels(true_labels)
    extended_pred = extend_labels(pred_labels)

    # Calculate the intersection
    intersection = len(extended_true.intersection(extended_pred))

    # Calculate hierarchical precision and recall for this instance
    hP_instance = intersection / len(extended_pred) if len(extended_pred) > 0 else 0
    hR_instance = intersection / len(extended_true) if len(extended_true) > 0 else 0
    return hP_instance, hR_instance

# Macro-Averaged Hierarchical Precision, Recall, and F1-score
macro_hP = 0
macro_hR = 0
num_instances = len(true_labels_list)

for true_labels, pred_labels in zip(true_labels_list, pred_labels_list):
    hP_instance, hR_instance = calculate_hierarchical_metrics_instance(true_labels, pred_labels)
    macro_hP += hP_instance
    macro_hR += hR_instance

# Calculate macro-averaged precision and recall
macro_hP /= num_instances
macro_hR /= num_instances
macro_hF1 = calculate_hierarchical_f1(macro_hP, macro_hR)

# Micro-Averaged Hierarchical Precision, Recall, and F1-score (as previously implemented)
hP, hR = calculate_hierarchical_metrics(true_labels_list, pred_labels_list)
hF1 = calculate_hierarchical_f1(hP, hR)

# Display Macro and Micro Hierarchical Metrics
print(f"Macro-Averaged Hierarchical Precision (hP): {macro_hP:.4f}")
print(f"Macro-Averaged Hierarchical Recall (hR): {macro_hR:.4f}")
print(f"Macro-Averaged Hierarchical F1 Score (hF1): {macro_hF1:.4f}")

print(f"Micro-Averaged Hierarchical Precision (hP): {hP:.4f}")
print(f"Micro-Averaged Hierarchical Recall (hR): {hR:.4f}")
print(f"Micro-Averaged Hierarchical F1 Score (hF1): {hF1:.4f}")


In [None]:
from sklearn.metrics import f1_score

# Initialize dictionaries to store F1 scores by level
f1_macro_by_level = {}
f1_micro_by_level = {}

for level in range(1, 4):  # Levels 1 to 3, excluding the root
    # Get labels belonging to this level
    level_labels = [label for label in mlb.classes_ if get_label_level(label) == level]
    level_indices = [i for i, label in enumerate(mlb.classes_) if label in level_labels]

    # Extract predictions and ground truth for the current level
    Y_pred_level = Y_pred_all[:, level_indices]
    Y_true_level = Y_test_all[:, level_indices]

    # Calculate F1 macro for this level
    f1_macro_by_level[level] = f1_score(Y_true_level, Y_pred_level, average='macro', zero_division=0)

    # Calculate F1 micro for this level
    f1_micro_by_level[level] = f1_score(Y_true_level, Y_pred_level, average='micro', zero_division=0)

# Print results for F1 macro and micro by level
print("F1 Scores by Level:")
for level in range(1, 4):
    print(f"  Level {level}: Macro F1 = {f1_macro_by_level[level]:.4f}, Micro F1 = {f1_micro_by_level[level]:.4f}")


In [None]:
import networkx as nx
import numpy as np

# Define the hierarchical graph
G = nx.DiGraph()
G.add_node("root")

# Construct the hierarchy graph with parent-child edges
for label in hierarchical_labels:
    parts = label.split("/")
    if len(parts) == 1:
        # Direct child of root
        G.add_edge("root", label)
    else:
        parent = "/".join(parts[:-1])
        G.add_edge(parent, label)

# Function to get the level of a label
def get_label_level(label):
    return label.count('/') + 1

# Function to calculate hops between two labels
def calculate_hops(label1, label2):
    try:
        # Calculate shortest path length between two nodes
        return nx.shortest_path_length(G.to_undirected(), source=label1, target=label2)
    except nx.NetworkXNoPath:
        # Return NaN if no path exists
        return np.nan

# Function to extend labels with their ancestors in the graph
def extend_labels(labels):
    extended_labels = set(labels)
    for label in labels:
        extended_labels.update(nx.ancestors(G, label))
    return extended_labels

# Calculate hops per instance for level 3 nodes
hops_per_instance = []

for true_labels, pred_labels in zip(true_labels_list, pred_labels_list):
    true_level3 = [lbl for lbl in true_labels if get_label_level(lbl) == 3]
    pred_level3 = [lbl for lbl in pred_labels if get_label_level(lbl) == 3]

    # Calculate hops between all TP and FP level 3 pairs
    hops = [calculate_hops(tp, fp) for tp in true_level3 for fp in pred_level3 if tp != fp]
    hops = [h for h in hops if not np.isnan(h)]  # Filter out NaN values

    if hops:
        # Store the mean hops for each instance
        hops_per_instance.append(np.mean(hops))

# Calculate mean hops and margin of error
mean_hops = np.mean(hops_per_instance) if hops_per_instance else np.nan
std_dev_hops = np.std(hops_per_instance) if hops_per_instance else np.nan
margin_of_error = 1.96 * std_dev_hops / np.sqrt(len(hops_per_instance)) if hops_per_instance else np.nan

print(f"Mean hops: {mean_hops:.2f}, Margin of error: ±{margin_of_error:.2f}")


 **FFNN**

In [None]:
import torch
import torch.nn as nn
import torch.utils.data
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, hamming_loss, f1_score, jaccard_score, average_precision_score, precision_recall_curve, roc_auc_score, auc
import matplotlib.pyplot as plt



# Your CSV file name from the uploaded files
csv_file = "/content/selected_features_bugs_final (1).csv"

# Load CSV data
data = pd.read_csv(csv_file)

# Extract features and labels
X = data.drop(columns=['Label']).values
y = data['Label'].apply(lambda x: x.split('@')).values

# Binarize multi-labels
mlb = MultiLabelBinarizer()
y_binarized = mlb.fit_transform(y)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binarized, test_size=0.2, random_state=42)

# Preprocessing: Imputation and scaling
scaler = StandardScaler().fit(X_train)
imputer = SimpleImputer(strategy='mean').fit(X_train)
X_train = torch.tensor(scaler.transform(imputer.transform(X_train)), dtype=torch.float32)
X_test = torch.tensor(scaler.transform(imputer.transform(X_test)), dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Define dataset and data loaders
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False)

# Define the model
class FlatFFNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, hyperparams):
        super(FlatFFNNModel, self).__init__()
        self.nb_layers = hyperparams['num_layers']
        fc = []
        for i in range(self.nb_layers):
            if i == 0:
                fc.append(nn.Linear(input_dim, hidden_dim))
            elif i == self.nb_layers - 1:
                fc.append(nn.Linear(hidden_dim, output_dim))
            else:
                fc.append(nn.Linear(hidden_dim, hidden_dim))
        self.fc = nn.ModuleList(fc)
        self.drop = nn.Dropout(hyperparams['dropout'])
        self.sigmoid = nn.Sigmoid()
        self.f = nn.ReLU() if hyperparams['non_lin'] == 'relu' else nn.Tanh()

    def forward(self, x):
        for i in range(self.nb_layers):
            if i == self.nb_layers - 1:
                x = self.sigmoid(self.fc[i](x))
            else:
                x = self.f(self.fc[i](x))
                x = self.drop(x)
        return x

# Define model hyperparameters
hyperparams = {'batch_size': 4, 'num_layers': 3, 'dropout': 0.7, 'non_lin': 'relu', 'hidden_dim': 1000, 'lr': 1e-5, 'weight_decay': 1e-5}
input_dim = X_train.shape[1]
output_dim = y_train.shape[1]

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = FlatFFNNModel(input_dim, hyperparams['hidden_dim'], output_dim, hyperparams).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=hyperparams['lr'], weight_decay=hyperparams['weight_decay'])
criterion = nn.BCELoss()

# Training loop
for epoch in range(100):  # Adjust number of epochs as needed
    model.train()
    epoch_loss = 0.0
    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f'Epoch [{epoch+1}/100], Loss: {epoch_loss / len(train_loader):.4f}')

# Evaluation
model.eval()
y_pred, y_prob = [], []
with torch.no_grad():
    for features, labels in test_loader:
        features = features.to(device)
        outputs = model(features)
        y_pred.append((outputs > 0.5).cpu())
        y_prob.append(outputs.cpu())

y_pred = torch.cat(y_pred).numpy()
y_prob = torch.cat(y_prob).numpy()
y_test = y_test.numpy()

# Evaluation metrics
metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "Hamming Loss": hamming_loss(y_test, y_pred),
    "F1 Score (micro)": f1_score(y_test, y_pred, average='micro'),
    "Jaccard Score (samples)": jaccard_score(y_test, y_pred, average='samples'),
    "Average Precision": average_precision_score(y_test, y_prob, average='micro'),
    "ROC AUC Score": roc_auc_score(y_test, y_prob, average='micro'),
}
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

# Plot Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y_test.ravel(), y_prob.ravel())
plt.figure(figsize=(10, 6))
plt.plot(recall, precision, label='Precision-Recall Curve', linestyle='--', marker='.')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.show()

# --- Hierarchical metrics implementation ---

# Define the hierarchical labels
hierarchical_labels = [
    "data",
    "data/structure",
    "data/structure/column",
    "data/structure/row",
    "data/structure/field",
    "data/database",
    "data/database/hbase",
    "data/database/mssql",
    "data/database/oracle",
    "data/integrity",
    "data/integrity/changed",
    "data/integrity/missing",
    "data/integrity/wrong",
    "data/manipulation",
    "data/manipulation/adjust",
    "data/manipulation/filter",
    "data/manipulation/import-export",
    "data/manipulation/save-delete",
    "data/manipulation/sort",
    "data/format",
    "data/type",
    "reliability",
    "reliability/performance",
    "reliability/performance/latency",
    "reliability/security",
    "reliability/code-issues",
    "reliability/error-handling",
    "reliability/error-handling/exceptions",
    "reliability/error-handling/unexpected-errors",
    "reliability/error-handling/untriggered-errors",
    "reliability/failures",
    "reliability/failures/process",
    "reliability/failures/server",
    "operation",
    "operation/fcr",
    "operation/ignis",
    "operation/staging",
    "operation/validation",
    "operation/designstudio",
    "operation/designstudio/pipeline",
    "operation/designstudio/schema",
    "operation/designstudio/product",
    "interface",
    "interface/button",
    "interface/button/clickbehavior",
    "interface/button/enable-disable",
    "interface/display",
    "interface/display/incorrect",
    "interface/display/missing",
    "interface/layout",
    "interface/layout/box",
    "interface/layout/grid",
    "interface/navigation",
    "interface/navigation/menu",
    "interface/navigation/search",
]


# Define hierarchy mapping for hierarchical metrics and hops calculation
hierarchy = {}
for label in hierarchical_labels:
    parts = label.split('/')
    ancestors = []
    for i in range(len(parts) - 1):
        ancestor = '/'.join(parts[:i + 1])
        ancestors.append(ancestor)
    hierarchy[label] = ancestors

# Helper function to determine label level based on the number of slashes
def get_label_level(label):
    return label.count('/') + 1

# Extend labels to include their ancestors for hierarchical metrics
def extend_labels(labels):
    extended_labels = set(labels)
    for label in labels:
        extended_labels.update(hierarchy.get(label, []))
    return extended_labels

# Convert binary matrices to label lists
def binary_to_labels(Y_binary, classes):
    label_list = []
    for row in Y_binary:
        labels = [classes[i] for i, val in enumerate(row) if val == 1]
        label_list.append(labels)
    return label_list

# Calculate hierarchical precision, recall, and F-measure
def calculate_hierarchical_metrics(y_true, y_pred):
    hP_numerator = 0
    hP_denominator = 0
    hR_numerator = 0
    hR_denominator = 0

    for true_labels, pred_labels in zip(y_true, y_pred):
        extended_true = extend_labels(true_labels)
        extended_pred = extend_labels(pred_labels)

        intersection = len(extended_true.intersection(extended_pred))
        hP_numerator += intersection
        hP_denominator += len(extended_pred) or 1
        hR_numerator += intersection
        hR_denominator += len(extended_true) or 1

    hP = hP_numerator / hP_denominator
    hR = hR_numerator / hR_denominator
    return hP, hR

def calculate_hF(hP, hR, beta=1):
    if hP + hR == 0:
        return 0
    return ( (beta ** 2 + 1) * hP * hR ) / ( beta ** 2 * hP + hR )



hP, hR = calculate_hierarchical_metrics(true_labels_list, pred_labels_list)
hF = calculate_hF(hP, hR)

# Output the hierarchical results
print(f"Hierarchical Precision (hP): {hP:.4f}")
print(f"Hierarchical Recall (hR): {hR:.4f}")
print(f"Hierarchical F-measure (hF): {hF:.4f}")


In [None]:
import networkx as nx
import numpy as np

# Define the hierarchical graph
G = nx.DiGraph()
G.add_node("root")

# Construct the hierarchy graph with parent-child edges
for label in hierarchical_labels:
    parts = label.split("/")
    if len(parts) == 1:
        # Direct child of root
        G.add_edge("root", label)
    else:
        parent = "/".join(parts[:-1])
        G.add_edge(parent, label)

# Function to get the level of a label
def get_label_level(label):
    return label.count('/') + 1

# Function to calculate hops between two labels
def calculate_hops(label1, label2):
    try:
        # Calculate shortest path length between two nodes
        return nx.shortest_path_length(G.to_undirected(), source=label1, target=label2)
    except nx.NetworkXNoPath:
        # Return NaN if no path exists
        return np.nan

# Function to extend labels with their ancestors in the graph
def extend_labels(labels):
    extended_labels = set(labels)
    for label in labels:
        extended_labels.update(nx.ancestors(G, label))
    return extended_labels

# Calculate hops per instance for level 3 nodes
hops_per_instance = []

for true_labels, pred_labels in zip(true_labels_list, pred_labels_list):
    true_level3 = [lbl for lbl in true_labels if get_label_level(lbl) == 3]
    pred_level3 = [lbl for lbl in pred_labels if get_label_level(lbl) == 3]

    # Calculate hops between all TP and FP level 3 pairs
    hops = [calculate_hops(tp, fp) for tp in true_level3 for fp in pred_level3 if tp != fp]
    hops = [h for h in hops if not np.isnan(h)]  # Filter out NaN values

    if hops:
        # Store the mean hops for each instance
        hops_per_instance.append(np.mean(hops))

# Calculate mean hops and margin of error
mean_hops = np.mean(hops_per_instance) if hops_per_instance else np.nan
std_dev_hops = np.std(hops_per_instance) if hops_per_instance else np.nan
margin_of_error = 1.96 * std_dev_hops / np.sqrt(len(hops_per_instance)) if hops_per_instance else np.nan

print(f"Mean hops: {mean_hops:.2f}, Margin of error: ±{margin_of_error:.2f}")


Mean hops: 5.27, Margin of error: ±0.05


In [None]:
from sklearn.metrics import f1_score
from collections import defaultdict

# Helper to get labels by level
def get_labels_by_level(y_binary, classes, level):
    return [[label for label in binary_to_labels([row], classes)[0] if get_label_level(label) == level] for row in y_binary]



# F1 scores (macro and micro) by level
def f1_by_level(y_true, y_pred, classes):
    f1_scores = {'macro': {}, 'micro': {}}
    for level in range(1, 4):
        y_true_level = get_labels_by_level(y_true, classes, level)
        y_pred_level = get_labels_by_level(y_pred, classes, level)

        # Binarize per level for F1 calculation
        mlb = MultiLabelBinarizer(classes=[c for c in classes if get_label_level(c) == level])
        y_true_bin = mlb.fit_transform(y_true_level)
        y_pred_bin = mlb.transform(y_pred_level)

        f1_scores['macro'][level] = f1_score(y_true_bin, y_pred_bin, average='macro')
        f1_scores['micro'][level] = f1_score(y_true_bin, y_pred_bin, average='micro')
    return f1_scores



# Example usage with y_test and y_pred
f1_scores_by_level = f1_by_level(y_test, y_pred, mlb.classes_)
print("F1 Scores by Level:", f1_scores_by_level)



In [None]:
# Calculate hierarchical F1 Score based on hP and hR
def calculate_hierarchical_f1(hP, hR):
    return (2 * hP * hR) / (hP + hR) if (hP + hR) > 0 else 0

# Function to calculate hierarchical metrics for each instance
def calculate_hierarchical_metrics_instance(true_labels, pred_labels):
    extended_true = extend_labels(true_labels)
    extended_pred = extend_labels(pred_labels)

    # Calculate the intersection
    intersection = len(extended_true.intersection(extended_pred))

    # Calculate hierarchical precision and recall for this instance
    hP_instance = intersection / len(extended_pred) if len(extended_pred) > 0 else 0
    hR_instance = intersection / len(extended_true) if len(extended_true) > 0 else 0
    return hP_instance, hR_instance

# Macro-Averaged Hierarchical Precision, Recall, and F1-score
macro_hP = 0
macro_hR = 0
num_instances = len(true_labels_list)

for true_labels, pred_labels in zip(true_labels_list, pred_labels_list):
    hP_instance, hR_instance = calculate_hierarchical_metrics_instance(true_labels, pred_labels)
    macro_hP += hP_instance
    macro_hR += hR_instance

# Calculate macro-averaged precision and recall
macro_hP /= num_instances
macro_hR /= num_instances
macro_hF1 = calculate_hierarchical_f1(macro_hP, macro_hR)

# Micro-Averaged Hierarchical Precision, Recall, and F1-score (as previously implemented)
hP, hR = calculate_hierarchical_metrics(true_labels_list, pred_labels_list)
hF1 = calculate_hierarchical_f1(hP, hR)

# Display Macro and Micro Hierarchical Metrics
print(f"Macro-Averaged Hierarchical Precision (hP): {macro_hP:.4f}")
print(f"Macro-Averaged Hierarchical Recall (hR): {macro_hR:.4f}")
print(f"Macro-Averaged Hierarchical F1 Score (hF1): {macro_hF1:.4f}")

print(f"Micro-Averaged Hierarchical Precision (hP): {hP:.4f}")
print(f"Micro-Averaged Hierarchical Recall (hR): {hR:.4f}")
print(f"Micro-Averaged Hierarchical F1 Score (hF1): {hF1:.4f}")


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Convert binary matrices to label lists for FFNN output
true_labels_list = binary_to_labels(y_test, mlb.classes_)
pred_labels_list = binary_to_labels(y_pred, mlb.classes_)


# Calculate micro and macro precision, recall, and F1 scores
micro_precision = precision_score(y_test, y_pred, average='micro')
micro_recall = recall_score(y_test, y_pred, average='micro')
micro_f1 = f1_score(y_test, y_pred, average='micro')

macro_precision = precision_score(y_test, y_pred, average='macro')
macro_recall = recall_score(y_test, y_pred, average='macro')
macro_f1 = f1_score(y_test, y_pred, average='macro')

# Output micro and macro metrics
print("\nMicro Metrics:")
print(f"Precision: {micro_precision:.4f}")
print(f"Recall: {micro_recall:.4f}")
print(f"F1 Score: {micro_f1:.4f}")

print("\nMacro Metrics:")
print(f"Precision: {macro_precision:.4f}")
print(f"Recall: {macro_recall:.4f}")
print(f"F1 Score: {macro_f1:.4f}")
