## Run case 1 and case 2 with entire samples

In [20]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC, OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [21]:
pip install openpyxl



In [22]:
def load_chip_data_xlsx(folder_path):
    all_data = []
    labels = []
    for i in range(1, 34):  # Chip1 to Chip33
        file_path = os.path.join(folder_path, f"Chip{i}.xlsx")
        df = pd.read_excel(file_path, header=None)
        # Row 1 and 25 are Trojan-Free (label 0), Rows 2–24 are Trojan-Inserted (label 1)
        tf_rows = df.iloc[[0, 24]].values
        ti_rows = df.iloc[1:24].values
        all_data.extend(tf_rows)
        labels.extend([0] * len(tf_rows))
        all_data.extend(ti_rows)
        labels.extend([1] * len(ti_rows))
    return np.array(all_data), np.array(labels)

In [23]:
def run_case1(X, y, classifier_type='rf'):
    results = []
    for _ in range(20):
        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=24, stratify=y)
        if classifier_type == 'rf':
            clf = RandomForestClassifier(n_estimators=100)
        elif classifier_type == 'svm':
            clf = SVC(kernel='rbf')
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        results.append(acc)
    return np.mean(results)

In [24]:
def run_case2(X, y, classifier_type='ocsvm'):
    tf_data = X[y == 0]
    ti_data = X[y == 1]
    results = []
    for _ in range(20):
        X_train, X_test_tf = train_test_split(tf_data, train_size=24)
        X_test = np.vstack([X_test_tf, ti_data])
        y_true = np.array([0] * len(X_test_tf) + [1] * len(ti_data))
        if classifier_type == 'ocsvm':
            clf = OneClassSVM(gamma='auto')
        elif classifier_type == 'iforest':
            clf = IsolationForest()
        clf.fit(X_train)
        y_pred = clf.predict(X_test)
        y_pred = np.where(y_pred == 1, 0, 1)  # Map 1→TF, -1→TI
        acc = accuracy_score(y_true, y_pred)
        results.append(acc)
    return np.mean(results)

In [25]:
folder = "ROFreq"
X, y = load_chip_data_xlsx(folder)

print("Case 1 - Random Forest Accuracy:", run_case1(X, y, 'rf'))
print("Case 1 - SVM Accuracy:", run_case1(X, y, 'svm'))

print("Case 2 - One-Class SVM Accuracy:", run_case2(X, y, 'ocsvm'))
print("Case 2 - Isolation Forest Accuracy:", run_case2(X, y, 'iforest'))

Case 1 - Random Forest Accuracy: 0.9149188514357054
Case 1 - SVM Accuracy: 0.919725343320849
Case 2 - One-Class SVM Accuracy: 0.9562421972534331
Case 2 - Isolation Forest Accuracy: 0.9352684144818977


## 6, 12, 24, all remaining samples are used for evaluation

In [26]:
def group_by_trojan_type(X, y):
    trojan_groups = {i: [] for i in range(23)}  # Trojan types 0–22
    tf_data = []
    for i in range(0, len(X), 25):  # Each chip has 25 rows
        tf_data.append(X[i])        # Row 0 = TF
        tf_data.append(X[i+24])     # Row 24 = TF
        for j in range(23):         # Rows 1–23 = TI
            trojan_groups[j].append(X[i+1+j])
    return np.array(tf_data), trojan_groups

In [27]:
def evaluate_case1(tf_data, trojan_groups, sample_size, classifier_type='rf'):
    tf_count = sample_size // 2
    ti_count = sample_size // 2
    results = []
    tprs = []  # List to store TPRs
    fprs = []  # List to store FPRs

    for _ in range(20):
        tf_train = tf_data[np.random.choice(len(tf_data), tf_count, replace=False)]
        ti_train = []
        ti_eval = []
        for j in range(23):
            samples = np.array(trojan_groups[j])
            selected = samples[np.random.choice(len(samples), ti_count, replace=False)]
            ti_train.extend(selected)
            ti_eval.extend([s for s in samples if s.tolist() not in selected.tolist()])
        X_train = np.vstack([tf_train, ti_train])
        y_train = np.array([0]*tf_count + [1]*len(ti_train))
        X_test = np.vstack([tf_data, ti_eval])
        y_test = np.array([0]*len(tf_data) + [1]*len(ti_eval))
        clf = RandomForestClassifier() if classifier_type == 'rf' else SVC(kernel='rbf')
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        results.append(acc)

        # Calculate TPR and FPR
        TP = np.sum((y_test == 1) & (y_pred == 1))
        FN = np.sum((y_test == 1) & (y_pred == 0))
        FP = np.sum((y_test == 0) & (y_pred == 1))
        TN = np.sum((y_test == 0) & (y_pred == 0))

        tpr = TP / (TP + FN) if (TP + FN) > 0 else 0
        fpr = FP / (FP + TN) if (FP + TN) > 0 else 0
        tprs.append(tpr)
        fprs.append(fpr)

    # Return mean accuracy, TPR, and FPR
    return np.mean(results), np.mean(tprs), np.mean(fprs)

In [28]:
def evaluate_case2(tf_data, trojan_groups, sample_size, classifier_type='ocsvm'):
    results = []
    tprs = []  # List to store TPRs
    fprs = []  # List to store FPRs

    for _ in range(20):
        tf_train = tf_data[np.random.choice(len(tf_data), sample_size, replace=False)]
        ti_eval = []
        for j in range(23):
            ti_eval.extend(trojan_groups[j])
        X_test = np.vstack([tf_data, ti_eval])
        y_test = np.array([0]*len(tf_data) + [1]*len(ti_eval))

        clf = OneClassSVM(gamma='auto') if classifier_type == 'ocsvm' else IsolationForest()
        clf.fit(tf_train)
        y_pred = clf.predict(X_test)
        y_pred = np.where(y_pred == 1, 0, 1)  # Map 1→TF, -1→TI

        acc = accuracy_score(y_test, y_pred)
        results.append(acc)

        # Calculate TPR and FPR
        TP = np.sum((y_test == 1) & (y_pred == 1))
        FN = np.sum((y_test == 1) & (y_pred == 0))
        FP = np.sum((y_test == 0) & (y_pred == 1))
        TN = np.sum((y_test == 0) & (y_pred == 0))

        tpr = TP / (TP + FN) if (TP + FN) > 0 else 0
        fpr = FP / (FP + TN) if (FP + TN) > 0 else 0
        tprs.append(tpr)
        fprs.append(fpr)

    # Return mean accuracy, TPR, and FPR
    return np.mean(results), np.mean(tprs), np.mean(fprs)

In [29]:
def run_all_evaluations(X, y):
    tf_data, trojan_groups = group_by_trojan_type(X, y)
    sample_sizes = [6, 12, 24]
    classifiers_case1 = ['rf', 'svm']
    classifiers_case2 = ['ocsvm', 'iforest']

    print("=== Case 1: Supervised ===")
    for size in sample_sizes:
        for clf in classifiers_case1:
            acc, tpr, fpr = evaluate_case1(tf_data, trojan_groups, size, classifier_type=clf)
            print(f"Sample Size {size} | Classifier {clf.upper()} | Accuracy: {acc:.2%} | TPR: {tpr:.2%} | FPR: {fpr:.2%}")

    print("\n=== Case 2: One-Class ===")
    for size in sample_sizes:
        for clf in classifiers_case2:
            acc, tpr, fpr = evaluate_case2(tf_data, trojan_groups, size, classifier_type=clf)
            print(f"Sample Size {size} | Classifier {clf.upper()} | Accuracy: {acc:.2%} | TPR: {tpr:.2%} | FPR: {fpr:.2%}")

In [30]:
X, y = load_chip_data_xlsx(folder)
run_all_evaluations(X, y)

=== Case 1: Supervised ===
Sample Size 6 | Classifier RF | Accuracy: 91.01% | TPR: 97.92% | FPR: 80.91%
Sample Size 6 | Classifier SVM | Accuracy: 91.23% | TPR: 100.00% | FPR: 100.00%
Sample Size 12 | Classifier RF | Accuracy: 90.68% | TPR: 98.42% | FPR: 81.29%
Sample Size 12 | Classifier SVM | Accuracy: 90.29% | TPR: 100.00% | FPR: 100.00%
Sample Size 24 | Classifier RF | Accuracy: 89.26% | TPR: 98.12% | FPR: 74.17%
Sample Size 24 | Classifier SVM | Accuracy: 87.71% | TPR: 100.00% | FPR: 100.00%

=== Case 2: One-Class ===
Sample Size 6 | Classifier OCSVM | Accuracy: 92.30% | TPR: 99.91% | FPR: 95.30%
Sample Size 6 | Classifier IFOREST | Accuracy: 56.52% | TPR: 57.71% | FPR: 57.27%
Sample Size 12 | Classifier OCSVM | Accuracy: 92.58% | TPR: 99.80% | FPR: 90.53%
Sample Size 12 | Classifier IFOREST | Accuracy: 82.98% | TPR: 86.17% | FPR: 53.64%
Sample Size 24 | Classifier OCSVM | Accuracy: 92.58% | TPR: 99.08% | FPR: 82.12%
Sample Size 24 | Classifier IFOREST | Accuracy: 87.67% | TPR: 90

## Evaluate per Trojan per case

In [31]:
def evaluate_case1_per_trojan(tf_data, trojan_groups, sample_size, classifier_type='rf'):
    tf_count = sample_size // 2
    ti_count = sample_size // 2
    trojan_accuracies = []

    for trojan_id in range(23):
        results = []
        for _ in range(20):
            tf_train = tf_data[np.random.choice(len(tf_data), tf_count, replace=False)]
            ti_train = np.array(trojan_groups[trojan_id])[np.random.choice(len(trojan_groups[trojan_id]), ti_count, replace=False)]
            X_train = np.vstack([tf_train, ti_train])
            y_train = np.array([0]*tf_count + [1]*ti_count)

            # Evaluation set: all TF + all TI of this Trojan type not in training
            ti_eval = [x for x in trojan_groups[trojan_id] if x.tolist() not in ti_train.tolist()]
            X_test = np.vstack([tf_data, ti_eval])
            y_test = np.array([0]*len(tf_data) + [1]*len(ti_eval))

            clf = RandomForestClassifier() if classifier_type == 'rf' else SVC(kernel='rbf')
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            results.append(acc)
        trojan_accuracies.append(np.mean(results))
    return trojan_accuracies

In [32]:
def evaluate_case2_per_trojan(tf_data, trojan_groups, sample_size, classifier_type='ocsvm'):
    trojan_accuracies = []

    for trojan_id in range(23):
        results = []
        for _ in range(20):
            tf_train = tf_data[np.random.choice(len(tf_data), sample_size, replace=False)]
            ti_eval = trojan_groups[trojan_id]
            X_test = np.vstack([tf_data, ti_eval])
            y_test = np.array([0]*len(tf_data) + [1]*len(ti_eval))

            clf = OneClassSVM(gamma='auto') if classifier_type == 'ocsvm' else IsolationForest()
            clf.fit(tf_train)
            y_pred = clf.predict(X_test)
            y_pred = np.where(y_pred == 1, 0, 1)
            acc = accuracy_score(y_test, y_pred)
            results.append(acc)
        trojan_accuracies.append(np.mean(results))
    return trojan_accuracies

In [33]:
def run_per_trojan_evaluation(X, y):
    tf_data, trojan_groups = group_by_trojan_type(X, y)
    sample_sizes = [6, 12, 24]

    for size in sample_sizes:
        print(f"\n=== Sample Size: {size} ===")
        for clf in ['rf', 'svm']:
            accs = evaluate_case1_per_trojan(tf_data, trojan_groups, size, classifier_type=clf)
            print(f"Case 1 | {clf.upper()} | Avg Accuracy per Trojan Type:")
            for i, acc in enumerate(accs):
                print(f"  Trojan {i+1}: {acc:.2%}")

        for clf in ['ocsvm', 'iforest']:
            accs = evaluate_case2_per_trojan(tf_data, trojan_groups, size, classifier_type=clf)
            print(f"Case 2 | {clf.upper()} | Avg Accuracy per Trojan Type:")
            for i, acc in enumerate(accs):
                print(f"  Trojan {i+1}: {acc:.2%}")

In [34]:
X, y = load_chip_data_xlsx(folder)
run_per_trojan_evaluation(X, y)


=== Sample Size: 6 ===
Case 1 | RF | Avg Accuracy per Trojan Type:
  Trojan 1: 64.86%
  Trojan 2: 74.96%
  Trojan 3: 82.78%
  Trojan 4: 86.77%
  Trojan 5: 83.99%
  Trojan 6: 85.60%
  Trojan 7: 82.55%
  Trojan 8: 79.18%
  Trojan 9: 79.00%
  Trojan 10: 74.09%
  Trojan 11: 78.52%
  Trojan 12: 80.15%
  Trojan 13: 76.04%
  Trojan 14: 84.97%
  Trojan 15: 78.21%
  Trojan 16: 83.81%
  Trojan 17: 78.43%
  Trojan 18: 80.03%
  Trojan 19: 85.96%
  Trojan 20: 88.00%
  Trojan 21: 84.62%
  Trojan 22: 77.46%
  Trojan 23: 63.47%
Case 1 | SVM | Avg Accuracy per Trojan Type:
  Trojan 1: 60.24%
  Trojan 2: 68.42%
  Trojan 3: 80.63%
  Trojan 4: 86.40%
  Trojan 5: 83.64%
  Trojan 6: 74.50%
  Trojan 7: 77.13%
  Trojan 8: 69.84%
  Trojan 9: 75.67%
  Trojan 10: 61.49%
  Trojan 11: 67.53%
  Trojan 12: 68.19%
  Trojan 13: 69.60%
  Trojan 14: 68.50%
  Trojan 15: 77.47%
  Trojan 16: 77.47%
  Trojan 17: 66.53%
  Trojan 18: 74.08%
  Trojan 19: 83.32%
  Trojan 20: 82.13%
  Trojan 21: 70.75%
  Trojan 22: 64.55%
  Tro