In [None]:
# Install package
!uv pip install -e .

# Import Required Libraries
import sys, os, json, time, glob, numpy as np, pandas as pd
from pathlib import Path
from analyze.Data import Data
from datastruct.Network import Network
from methods.lasso import Lasso
from methods.lsco import LSCO
from methods.tigress import TIGRESS
from analyze.CompareModels import CompareModels
from bootstrap.nb_fdr import NetworkBootstrap

# Configuration
OUTPUT_DIR = 'benchmark_results'
DATASET_ROOT = os.path.expanduser('../GeneSPIDER2/data/gs-datasets/N50')
NETWORK_ROOT = os.path.expanduser('../GeneSPIDER2/data/gs-networks')
ZETAVEC = np.logspace(-6, 0, 30)
N_INIT, N_BOOT, FDR = 10, 10, 5
Path(OUTPUT_DIR).mkdir(exist_ok=True)
print("Setup complete.")

: 

In [None]:
# Helper Functions
def run_comparison_analysis(true_net, inferred_net):
    comp = CompareModels(true_net, Network(inferred_net))
    try:
        from sklearn.metrics import roc_auc_score
        tf = (true_net.A != 0).astype(float).flatten()
        inf = np.abs(inferred_net.flatten())
        auroc = roc_auc_score(tf, inf) if len(np.unique(tf)) > 1 and inf.sum() > 0 else 0.5
    except: auroc = 0.5
    return {'f1': comp.F1[0] if comp.F1 else 0, 'auroc': auroc, 'pre': comp.pre[0] if comp.pre else 0, 'sen': comp.sen[0] if comp.sen else 0, 'mcc': comp.MCC[0] if comp.MCC else 0}

def find_network_file(network_dir, network_id):
    for pattern in [f"*{network_id}*.json", f"**/*ID{network_id}.json", f"**/*ID{network_id}*", f"**/*{network_id}.json", f"**/*{network_id}*"]:
        if (matches := list(Path(network_dir).rglob(pattern))): return matches[0]
    return None

def run_standard_method(method_name, data, zetavec):
    start = time.time()
    try:
        if method_name == 'LASSO': A_3d, _ = Lasso(data, alpha_range=zetavec); A = A_3d[:, :, -1]
        elif method_name == 'LSCO': A_3d, _ = LSCO(data, threshold_range=zetavec); A = A_3d[:, :, -1]
        elif method_name == 'TIGRESS': A = TIGRESS(data)
        else: raise ValueError(f"Unknown method: {method_name}")
        return A, time.time() - start, None
    except Exception as e: return None, time.time() - start, str(e)

def run_nestboot_method(method_name, data, net, zetavec, n_init, n_boot, fdr, seed=42):
    np.random.seed(seed); nb = NetworkBootstrap(); start = time.time()
    def inf_meth(ds): 
        if method_name == 'LASSO': return Lasso(ds, alpha_range=zetavec)[0]
        elif method_name == 'LSCO': return LSCO(ds, threshold_range=zetavec)[0]
        else: raise ValueError(f"Unknown method: {method_name}")
    results = nb.run_nestboot(data, inf_meth, n_init, n_boot, seed, {})
    exec_time = time.time() - start
    binary_net = np.zeros((data.data.N, data.data.N))
    for idx, (gi, gj) in enumerate(zip(results.gene_i, results.gene_j)):
        i, j = int(gi.split('_')[1]), int(gj.split('_')[1])
        binary_net[i, j] = results.xnet[idx]
    metrics = run_comparison_analysis(net, binary_net)
    return {'method': f'{method_name}+NestBoot', 'n_init': n_init, 'n_boot': n_boot, 'fdr': fdr, 'time': exec_time, **metrics, 'density': (binary_net != 0).sum() / binary_net.size, 'support': results.support, 'fp_rate': results.fp_rate}

print("Helper functions defined.")

In [None]:
# Run Benchmark
dataset_files = sorted(glob.glob(os.path.join(DATASET_ROOT, "*.json")))
print(f"Found {len(dataset_files)} N50 datasets.")
all_results = []; results_file = Path(OUTPUT_DIR) / 'n50_benchmark_results.csv'

processed_count = 0
for dataset_path in dataset_files[:5]:  # Limit to first 5 for testing
    dataset_filename = os.path.basename(dataset_path)
    print(f"\nProcessing {dataset_filename}")
    try:
        data = Data.from_json_file(dataset_path)
        with open(dataset_path) as f: json_data = json.load(f)
        network_id = json_data['obj_data']['network'].split('-ID')[-1]
        network_path = find_network_file(NETWORK_ROOT, network_id)
        if not network_path: print(f"Network not found for {network_id}"); continue
        net = Network.from_json_file(str(network_path))
        methods = ['TIGRESS', 'LASSO', 'LSCO']; nestboot_methods = ['LASSO', 'LSCO']
        for method in methods:
            print(f"Running {method}...")
            inferred_net, exec_time, error = run_standard_method(method, data, ZETAVEC)
            if inferred_net is not None:
                metrics = run_comparison_analysis(net, inferred_net)
                result = {'dataset': dataset_filename, 'network': os.path.basename(str(network_path)), 'method': method, 'execution_time': exec_time, 'f1_score': metrics['f1'], 'auroc': metrics['auroc'], 'precision': metrics['pre'], 'recall': metrics['sen'], 'mcc': metrics['mcc'], 'density': (inferred_net != 0).sum() / inferred_net.size}
                all_results.append(result)
            else: print(f"{method} failed: {error}")
        for method in nestboot_methods:
            print(f"Running {method}+NestBoot...")
            result = run_nestboot_method(method, data, net, ZETAVEC, N_INIT, N_BOOT, FDR)
            result.update({'dataset': dataset_filename, 'network': os.path.basename(str(network_path))})
            all_results.append(result)
        processed_count += 1
        pd.DataFrame(all_results).to_csv(results_file, index=False)
    except Exception as e: print(f"Error processing {dataset_filename}: {e}"); import traceback; traceback.print_exc()

print(f"Processed {processed_count} datasets. Results saved to {results_file}.")

# Summary
if all_results:
    df = pd.DataFrame(all_results)
    print("\nSummary by Method:")
    for method in df['method'].unique():
        mr = df[df['method'] == method]
        print(f"{method}: F1 {mr['f1_score'].mean():.3f} ± {mr['f1_score'].std():.3f}, AUROC {mr['auroc'].mean():.3f} ± {mr['auroc'].std():.3f}, Time {mr['execution_time'].mean():.1f} ± {mr['execution_time'].std():.1f}s")