In [1]:
# Ensure you are connected to GPU
!nvidia-smi

Tue Dec  9 22:29:37 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   38C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
# Install PrismNet dependencies
!pip install torch torchvision torchaudio --quiet
!pip install pandas numpy scikit-learn matplotlib seaborn --quiet
!pip install biopython --quiet
!pip install tensorboardX --quiet

In [4]:
# FIRST MOUNT GOOGLE DRIVE
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
!ls /content/drive/MyDrive/PrismNet_files/

data	 motif_construct    README.md		 setup.py
exp	 prismnet	    requirements.txt	 tools
LICENSE  prismnet.egg-info  run_prismnet_all.sh  train.log


In [5]:
import os
# Change this path to where your PrismNet folder is in Drive
os.chdir("/content/drive/MyDrive/PrismNet_files")
!pwd
!ls

/content/drive/MyDrive/PrismNet_files
data	 motif_construct    README.md		 setup.py
exp	 prismnet	    requirements.txt	 tools
LICENSE  prismnet.egg-info  run_prismnet_all.sh  train.log


In [20]:
import h5py
import os

# List of all six proteins
proteins = [
    'PCBP1_K562',
    'SRSF9_HepG2',
    'TRA2A_K562',
    'HNRNPC_Hela',
    'PCBP2_HepG2',
    'TIA1_Hela'
]

data_dir = '/content/drive/MyDrive/PrismNet_files/data/clip_data/'

for p in proteins:
    file_path = os.path.join(data_dir, f'{p}.h5')
    if not os.path.exists(file_path):
        print(f"{p}.h5 not found!")
        continue

    print(f"\nProtein: {p}")
    print("File size:", os.path.getsize(file_path)/1e6, "MB")

    with h5py.File(file_path, 'r') as f:
        print("Datasets:", list(f.keys()))
        for k in f.keys():
            print(f"{k} shape: {f[k].shape}, dtype: {f[k].dtype}")



Protein: PCBP1_K562
File size: 4.347325 MB
Datasets: ['X_test', 'X_train', 'Y_test', 'Y_train']
X_test shape: (3000, 5, 101), dtype: float32
X_train shape: (12000, 5, 101), dtype: float32
Y_test shape: (3000, 1), dtype: int32
Y_train shape: (12000, 1), dtype: int32

Protein: SRSF9_HepG2
File size: 4.235208 MB
Datasets: ['X_test', 'X_train', 'Y_test', 'Y_train']
X_test shape: (3000, 5, 101), dtype: float32
X_train shape: (12002, 5, 101), dtype: float32
Y_test shape: (3000, 1), dtype: int32
Y_train shape: (12002, 1), dtype: int32

Protein: TRA2A_K562
File size: 4.324073 MB
Datasets: ['X_test', 'X_train', 'Y_test', 'Y_train']
X_test shape: (3000, 5, 101), dtype: float32
X_train shape: (12002, 5, 101), dtype: float32
Y_test shape: (3000, 1), dtype: int32
Y_train shape: (12002, 1), dtype: int32

Protein: HNRNPC_Hela
File size: 4.31344 MB
Datasets: ['X_test', 'X_train', 'Y_test', 'Y_train']
X_test shape: (3000, 5, 101), dtype: float32
X_train shape: (12002, 5, 101), dtype: float32
Y_test sh

In [21]:
# PCBP1_K562
!python -u /content/drive/MyDrive/PrismNet_files/tools/main.py \
    --train --eval \
    --lr 0.001 \
    --data_dir /content/drive/MyDrive/PrismNet_files/data/clip_data/ \
    --p_name PCBP1_K562 \
    --out_dir /content/drive/MyDrive/PrismNet_files/exp/prismnet \
    --exp_name prismnet \
    --batch_size 32 \
    --workers 2 \
    --nepochs 5 \
    --pos_weight 2 \
    --weight_decay 1e-6 \
    --early_stopping 20

Namespace(data_dir='/content/drive/MyDrive/PrismNet_files/data/clip_data/', exp_name='prismnet', p_name='PCBP1_K562', out_dir='/content/drive/MyDrive/PrismNet_files/exp/prismnet', mode='pu', infer_file='', arch='PrismNet', lr_scheduler='warmup', lr=0.001, batch_size=32, nepochs=5, pos_weight=2, weight_decay=1e-06, early_stopping=20, load_best=False, eval=True, train=True, infer=False, infer_test=False, eval_test=False, saliency=False, saliency_img=False, har=False, tfboard=False, no_cuda=False, workers=2, log_interval=100, seed=1024)
Network Arch: PrismNet
Total params: 58189
Trainable params: 58189
Non-trainable params: 0
train: [0 1] [8000 4000]
test: [0 1] [2000 1000]
train: [0 1] [8000 4000]
test: [0 1] [2000 1000]
Train set: 12000
Test  set: 3000
[1m[32mPCBP1_K562 	 Train Epoch: 1     avg.loss: 0.6885 Acc: 0.75%, AUC: 0.8264 lr: 0.002400[0m
[1m[31mPCBP1_K562 	 Test  Epoch: 1     avg.loss: 0.6720 Acc: 0.72%, AUC: 0.8621 (0.8621)[0m
[1m[32mPCBP1_K562 	 Train Epoch: 2     avg

In [22]:
# SRSF9_HepG2
!python -u /content/drive/MyDrive/PrismNet_files/tools/main.py \
    --train --eval \
    --lr 0.001 \
    --data_dir /content/drive/MyDrive/PrismNet_files/data/clip_data/ \
    --p_name SRSF9_HepG2 \
    --out_dir /content/drive/MyDrive/PrismNet_files/exp/prismnet \
    --exp_name prismnet \
    --batch_size 32 \
    --workers 2 \
    --nepochs 5 \
    --pos_weight 2 \
    --weight_decay 1e-6 \
    --early_stopping 20


Namespace(data_dir='/content/drive/MyDrive/PrismNet_files/data/clip_data/', exp_name='prismnet', p_name='SRSF9_HepG2', out_dir='/content/drive/MyDrive/PrismNet_files/exp/prismnet', mode='pu', infer_file='', arch='PrismNet', lr_scheduler='warmup', lr=0.001, batch_size=32, nepochs=5, pos_weight=2, weight_decay=1e-06, early_stopping=20, load_best=False, eval=True, train=True, infer=False, infer_test=False, eval_test=False, saliency=False, saliency_img=False, har=False, tfboard=False, no_cuda=False, workers=2, log_interval=100, seed=1024)
Network Arch: PrismNet
Total params: 58189
Trainable params: 58189
Non-trainable params: 0
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
Train set: 12002
Test  set: 3000
[1m[32mSRSF9_HepG2 	 Train Epoch: 1     avg.loss: 0.8096 Acc: 0.66%, AUC: 0.7361 lr: 0.002400[0m
[1m[31mSRSF9_HepG2 	 Test  Epoch: 1     avg.loss: 0.8184 Acc: 0.72%, AUC: 0.7529 (0.7529)[0m
[1m[32mSRSF9_HepG2 	 Train Epoch: 2    

In [23]:
# TRA2A_K562
!python -u /content/drive/MyDrive/PrismNet_files/tools/main.py \
    --train --eval \
    --lr 0.001 \
    --data_dir /content/drive/MyDrive/PrismNet_files/data/clip_data/ \
    --p_name TRA2A_K562 \
    --out_dir /content/drive/MyDrive/PrismNet_files/exp/prismnet \
    --exp_name prismnet \
    --batch_size 32 \
    --workers 2 \
    --nepochs 5 \
    --pos_weight 2 \
    --weight_decay 1e-6 \
    --early_stopping 20

Namespace(data_dir='/content/drive/MyDrive/PrismNet_files/data/clip_data/', exp_name='prismnet', p_name='TRA2A_K562', out_dir='/content/drive/MyDrive/PrismNet_files/exp/prismnet', mode='pu', infer_file='', arch='PrismNet', lr_scheduler='warmup', lr=0.001, batch_size=32, nepochs=5, pos_weight=2, weight_decay=1e-06, early_stopping=20, load_best=False, eval=True, train=True, infer=False, infer_test=False, eval_test=False, saliency=False, saliency_img=False, har=False, tfboard=False, no_cuda=False, workers=2, log_interval=100, seed=1024)
Network Arch: PrismNet
Total params: 58189
Trainable params: 58189
Non-trainable params: 0
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
Train set: 12002
Test  set: 3000
[1m[32mTRA2A_K562 	 Train Epoch: 1     avg.loss: 0.7423 Acc: 0.72%, AUC: 0.7920 lr: 0.002400[0m
[1m[31mTRA2A_K562 	 Test  Epoch: 1     avg.loss: 0.8631 Acc: 0.64%, AUC: 0.8254 (0.8254)[0m
[1m[32mTRA2A_K562 	 Train Epoch: 2     avg

In [24]:
# HNRNPC_Hela
!python -u /content/drive/MyDrive/PrismNet_files/tools/main.py \
    --train --eval \
    --lr 0.001 \
    --data_dir /content/drive/MyDrive/PrismNet_files/data/clip_data/ \
    --p_name HNRNPC_Hela \
    --out_dir /content/drive/MyDrive/PrismNet_files/exp/prismnet \
    --exp_name prismnet \
    --batch_size 32 \
    --workers 2 \
    --nepochs 5 \
    --pos_weight 2 \
    --weight_decay 1e-6 \
    --early_stopping 20

Namespace(data_dir='/content/drive/MyDrive/PrismNet_files/data/clip_data/', exp_name='prismnet', p_name='HNRNPC_Hela', out_dir='/content/drive/MyDrive/PrismNet_files/exp/prismnet', mode='pu', infer_file='', arch='PrismNet', lr_scheduler='warmup', lr=0.001, batch_size=32, nepochs=5, pos_weight=2, weight_decay=1e-06, early_stopping=20, load_best=False, eval=True, train=True, infer=False, infer_test=False, eval_test=False, saliency=False, saliency_img=False, har=False, tfboard=False, no_cuda=False, workers=2, log_interval=100, seed=1024)
Network Arch: PrismNet
Total params: 58189
Trainable params: 58189
Non-trainable params: 0
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
Train set: 12002
Test  set: 3000
[1m[32mHNRNPC_Hela 	 Train Epoch: 1     avg.loss: 0.5745 Acc: 0.81%, AUC: 0.8906 lr: 0.002400[0m
[1m[31mHNRNPC_Hela 	 Test  Epoch: 1     avg.loss: 0.6185 Acc: 0.83%, AUC: 0.8986 (0.8986)[0m
[1m[32mHNRNPC_Hela 	 Train Epoch: 2    

In [25]:
# PCBP2_HepG2
!python -u /content/drive/MyDrive/PrismNet_files/tools/main.py \
    --train --eval \
    --lr 0.001 \
    --data_dir /content/drive/MyDrive/PrismNet_files/data/clip_data/ \
    --p_name PCBP2_HepG2 \
    --out_dir /content/drive/MyDrive/PrismNet_files/exp/prismnet \
    --exp_name prismnet \
    --batch_size 32 \
    --workers 2 \
    --nepochs 5 \
    --pos_weight 2 \
    --weight_decay 1e-6 \
    --early_stopping 20

Namespace(data_dir='/content/drive/MyDrive/PrismNet_files/data/clip_data/', exp_name='prismnet', p_name='PCBP2_HepG2', out_dir='/content/drive/MyDrive/PrismNet_files/exp/prismnet', mode='pu', infer_file='', arch='PrismNet', lr_scheduler='warmup', lr=0.001, batch_size=32, nepochs=5, pos_weight=2, weight_decay=1e-06, early_stopping=20, load_best=False, eval=True, train=True, infer=False, infer_test=False, eval_test=False, saliency=False, saliency_img=False, har=False, tfboard=False, no_cuda=False, workers=2, log_interval=100, seed=1024)
Network Arch: PrismNet
Total params: 58189
Trainable params: 58189
Non-trainable params: 0
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
Train set: 12002
Test  set: 3000
[1m[32mPCBP2_HepG2 	 Train Epoch: 1     avg.loss: 0.5980 Acc: 0.80%, AUC: 0.8793 lr: 0.002400[0m
[1m[31mPCBP2_HepG2 	 Test  Epoch: 1     avg.loss: 0.5411 Acc: 0.81%, AUC: 0.9102 (0.9102)[0m
[1m[32mPCBP2_HepG2 	 Train Epoch: 2    

In [26]:
# TIA1_Hela
!python -u /content/drive/MyDrive/PrismNet_files/tools/main.py \
    --train --eval \
    --lr 0.001 \
    --data_dir /content/drive/MyDrive/PrismNet_files/data/clip_data/ \
    --p_name TIA1_Hela \
    --out_dir /content/drive/MyDrive/PrismNet_files/exp/prismnet \
    --exp_name prismnet \
    --batch_size 32 \
    --workers 2 \
    --nepochs 5 \
    --pos_weight 2 \
    --weight_decay 1e-6 \
    --early_stopping 20

Namespace(data_dir='/content/drive/MyDrive/PrismNet_files/data/clip_data/', exp_name='prismnet', p_name='TIA1_Hela', out_dir='/content/drive/MyDrive/PrismNet_files/exp/prismnet', mode='pu', infer_file='', arch='PrismNet', lr_scheduler='warmup', lr=0.001, batch_size=32, nepochs=5, pos_weight=2, weight_decay=1e-06, early_stopping=20, load_best=False, eval=True, train=True, infer=False, infer_test=False, eval_test=False, saliency=False, saliency_img=False, har=False, tfboard=False, no_cuda=False, workers=2, log_interval=100, seed=1024)
Network Arch: PrismNet
Total params: 58189
Trainable params: 58189
Non-trainable params: 0
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
train: [0 1] [8002 4000]
test: [0 1] [2000 1000]
Train set: 12002
Test  set: 3000
[1m[32mTIA1_Hela 	 Train Epoch: 1     avg.loss: 0.4673 Acc: 0.85%, AUC: 0.9322 lr: 0.002400[0m
[1m[31mTIA1_Hela 	 Test  Epoch: 1     avg.loss: 0.4304 Acc: 0.87%, AUC: 0.9455 (0.9455)[0m
[1m[32mTIA1_Hela 	 Train Epoch: 2     avg.los

In [27]:
import pandas as pd

# List all 6 proteins
proteins = ['PCBP1_K562', 'SRSF9_HepG2', 'TRA2A_K562', 'HNRNPC_Hela', 'PCBP2_HepG2', 'TIA1_Hela']

metrics_dir = '/content/drive/MyDrive/PrismNet_files/exp/prismnet/out/evals/'
results = []

for p in proteins:
    metrics_file = f"{metrics_dir}{p}_PrismNet_pu.metrics"
    try:
        df = pd.read_csv(metrics_file, sep='\t', header=None)
        acc = df.iloc[0, 1]
        auroc = df.iloc[0, 2]
        tp, tn, fp, fn = df.iloc[0, 4], df.iloc[0, 5], df.iloc[0, 6], df.iloc[0, 7]

        # F1 calculation
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

        results.append({'Protein': p, 'AUROC': auroc, 'Accuracy': acc, 'F1': f1})
    except Exception as e:
        print(f"Could not read metrics for {p}: {e}")
        results.append({'Protein': p, 'AUROC': None, 'Accuracy': None, 'F1': None})

# Create summary DataFrame
summary_df = pd.DataFrame(results)
print(summary_df)

# Compute averages
auc_avg = summary_df['AUROC'].dropna().mean()
acc_avg = summary_df['Accuracy'].dropna().mean()
f1_avg = summary_df['F1'].dropna().mean()

print(f"\nAverage AUROC: {auc_avg:.4f}")
print(f"Average Accuracy: {acc_avg:.4f}")
print(f"Average F1: {f1_avg:.4f}")


       Protein  AUROC  Accuracy        F1
0   PCBP1_K562  0.902     0.840  0.750000
1  SRSF9_HepG2  0.805     0.675  0.624037
2   TRA2A_K562  0.874     0.808  0.689952
3  HNRNPC_Hela  0.913     0.797  0.748760
4  PCBP2_HepG2  0.921     0.858  0.795791
5    TIA1_Hela  0.951     0.879  0.834097

Average AUROC: 0.8943
Average Accuracy: 0.8095
Average F1: 0.7404
