In [22]:
import os
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import glob
import numpy as np
import statistics

In [23]:
base_dir = "/Users/m221138/RA_ACPA_multiomics/analysis/machine_learning_r1.1/5fold/enet_2condition/fs_network_topn_cVSneg/top90"
results = []
seed = int(225)

for fold in range(1, 6):
    train_file = os.path.join(base_dir, f"{fold}fold/multiplex.fs.train.tsv")
    test_file = os.path.join(base_dir, f"{fold}fold/multiplex.fs.test.tsv")
    
    # Load data
    train_df = pd.read_csv(train_file, sep="\t", index_col=0)
    test_df = pd.read_csv(test_file, sep="\t", index_col=0)
    
    # First row is class
    y_train = train_df.iloc[0].values
    y_test = test_df.iloc[0].values
    
    # Remaining rows are features
    X_train = train_df.iloc[1:].T.values
    X_test = test_df.iloc[1:].T.values
    feature_names = train_df.index[1:]
    
    # Train RF
    clf = RandomForestClassifier(random_state=seed)
    clf.fit(X_train, y_train)
    
    # Predict
    y_pred = clf.predict(X_test)
    
    # Accuracy
    acc = accuracy_score(y_test, y_pred)
    
    # Feature importance
    importances = pd.Series(clf.feature_importances_, index=feature_names).sort_values(ascending=False)
    
    results.append({
        "fold": fold,
        "accuracy": acc,
        "report": classification_report(y_test, y_pred, zero_division=0),
        "importances": importances
    })

# Print results
for r in results:
    # print(f"\n=== Fold {r['fold']} ===")
    print("Accuracy:", r["accuracy"])
    # print(r["report"])
    # print("Top 10 important features:")
    # print(r["importances"].head(10))

    # Collect accuracies
accuracies = [r["accuracy"] for r in results]

# Average accuracy
avg_acc = statistics.mean(accuracies)
std_acc = statistics.stdev(accuracies)

print("\n=== Summary ===")
print("Accuracies per fold:", accuracies)
print(f"Average accuracy: {avg_acc:.10f} ± {std_acc:.10f}")

Accuracy: 0.8125
Accuracy: 0.9375
Accuracy: 0.875
Accuracy: 0.9375
Accuracy: 1.0

=== Summary ===
Accuracies per fold: [0.8125, 0.9375, 0.875, 0.9375, 1.0]
Average accuracy: 0.9125000000 ± 0.0712609641
