### Check GPU hardware

In [1]:
!nvidia-smi

Wed Jun  7 10:10:04 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  On   | 00000000:4E:00.0 Off |                    0 |
| N/A   27C    P0    58W / 400W |      0MiB / 81920MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Save hardware configuration

In [2]:
# Create the summary file
!echo "Hardware Summary" > hardware_summary.txt

# Fetch and write CPU Information
!echo "\nCPU Information:" >> hardware_summary.txt
!echo "-----------------" >> hardware_summary.txt
!lscpu | egrep 'Model name|Socket|Thread|CPU\(s\)' >> hardware_summary.txt

# Fetch and write Total RAM Information
!echo "\nTotal RAM Information:" >> hardware_summary.txt
!echo "-----------------" >> hardware_summary.txt
!free -h | grep Mem | awk '{print $2}' >> hardware_summary.txt


# Fetch and write GPU Information
!echo "\nGPU Information:" >> hardware_summary.txt
!echo "-----------------" >> hardware_summary.txt

# If you have a Nvidia GPU
!nvidia-smi --query-gpu=gpu_name,driver_version,memory.total --format=csv >> hardware_summary.txt

# Alternatively for other GPUs
# !lspci | grep VGA >> hardware_summary.txt


### Install D-SCRIPT

In [1]:
!pip install dscript

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [1]:
import os
# Set OMP_NUM_THREADS to "1", limiting OpenMP to single-threaded operation
os.environ["OMP_NUM_THREADS"] = "1"

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.9.0+cu111
  Downloading https://download.pytorch.org/whl/cu111/torch-1.9.0%2Bcu111-cp38-cp38-linux_x86_64.whl (2041.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 GB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:02[0m
[?25hCollecting torchvision==0.10.0+cu111
  Downloading https://download.pytorch.org/whl/cu111/torchvision-0.10.0%2Bcu111-cp38-cp38-linux_x86_64.whl (23.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.2/23.2 MB[0m [31m36.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torchaudio==0.9.0
  Downloading torchaudio-0.9.0-cp38-cp38-manylinux1_x86_64.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: torch, torchvision, torchaudio
  Attempting uninstall: torch
    Found existing

### Download trained models

In [2]:
# Download Human Topsy-Turvy model
!wget http://cb.csail.mit.edu/cb/dscript/data/models/topsy_turvy_v1.sav

--2023-06-07 10:13:59--  http://cb.csail.mit.edu/cb/dscript/data/models/topsy_turvy_v1.sav
Resolving cb.csail.mit.edu (cb.csail.mit.edu)... 128.30.2.148
Connecting to cb.csail.mit.edu (cb.csail.mit.edu)|128.30.2.148|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2543014 (2.4M)
Saving to: ‘topsy_turvy_v1.sav.1’


2023-06-07 10:14:00 (3.19 MB/s) - ‘topsy_turvy_v1.sav.1’ saved [2543014/2543014]



### Download sequence and interaction files for test datasets

In [3]:
!wget https://raw.githubusercontent.com/anhvt00/MCAPS/master/data/Dscript-data/pairs/ecoli_test.tsv
!wget https://raw.githubusercontent.com/anhvt00/MCAPS/master/data/Dscript-data/seqs/ecoli.fasta


--2023-06-07 10:14:02--  https://raw.githubusercontent.com/anhvt00/MCAPS/master/data/Dscript-data/pairs/ecoli_test.tsv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 812468 (793K) [text/plain]
Saving to: ‘ecoli_test.tsv’


2023-06-07 10:14:03 (4.89 MB/s) - ‘ecoli_test.tsv’ saved [812468/812468]

--2023-06-07 10:14:03--  https://raw.githubusercontent.com/anhvt00/MCAPS/master/data/Dscript-data/seqs/ecoli.fasta
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5413511 (5.2M) [text/plain]
Saving to: ‘ecoli.fasta’


2023-06-07 10:14:04 

### Generate embeddings

In [4]:
!dscript embed --seqs ecoli.fasta -o ecoli.h5 -d 0

[2023-06-07-10:14:10] # Using CUDA device 0 - NVIDIA A100-SXM4-80GB
[2023-06-07-10:14:10] # Loading Model...
[2023-06-07-10:14:10] Downloading model lm_v1 from http://cb.csail.mit.edu/cb/dscript/data/models/dscript_lm_v1.pt...
[2023-06-07-10:14:43] # Loading Sequences...
100%|██████████████████████████████████| 17722/17722 [00:00<00:00, 18486.14it/s]
[2023-06-07-10:14:45] # 17722 Sequences Loaded
[2023-06-07-10:14:45] # Approximate Storage Required (varies by average sequence length): ~141.776GB
[2023-06-07-10:14:45] # Storing to ecoli.h5...
100%|█████████████████████████████████████| 17722/17722 [10:02<00:00, 29.40it/s]


### Evaluate on test dataset

In [2]:
# Evaluate with Human Topsy_turvy model 
!dscript evaluate --model topsy_turvy_v1.sav --test ecoli_test.tsv --embedding ecoli.h5 --outfile ecoli_topsy_turvy -d 0

[2023-06-07-10:31:30] Using CUDA device 0 - NVIDIA A100-SXM4-80GB
100%|██████████████████████████████████████| 7138/7138 [00:27<00:00, 257.18it/s]
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Predicting pairs: 100%|██████████████████| 22000/22000 [01:42<00:00, 214.21it/s]
[2023-06-07-10:33:56] AUPR: 0.371926875057703
[2023-06-07-10:33:56] AUROC: 0.6429879125


### Read prediction file

In [5]:
import pandas as pd
df = pd.read_csv('ecoli_topsy_turvy.predictions.tsv', sep='\t', header=None)
df.columns = ['id_1', 'id_2', 'true_label', 'predicted_label']
df

Unnamed: 0,id_1,id_2,true_label,predicted_label
0,362663.ECP_3406,362663.ECP_4448,1.0,0.792360
1,362663.ECP_0442,362663.ecp:ECP_0985,1.0,0.672620
2,362663.ECP_3384,362663.ECP_4447,1.0,0.947100
3,362663.ECP_0161,362663.ecp:ECP_3117,1.0,0.004833
4,362663.ecp:ECP_1481,362663.ECP_2475,1.0,0.953230
...,...,...,...,...
21995,362663.ECP_2384,362663.ECP_2922,0.0,0.016239
21996,362663.ECP_1743,362663.ECP_1562,0.0,0.490990
21997,362663.ECP_3662,362663.ECP_4685,0.0,0.402600
21998,362663.ECP_3212,362663.ECP_3077,0.0,0.491800


### Compute performance metrics

In [6]:
# Import necessary libraries
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, average_precision_score, matthews_corrcoef

import numpy as np

# Extract prediction and true labels
y_true = df.iloc[:, 2]
y_prob = df.iloc[:, 3]
y_pred = np.round(y_prob)

# Accuracy
acc = accuracy_score(y_true, y_pred)

# Precision
prec = precision_score(y_true, y_pred)

# Recall
rec = recall_score(y_true, y_pred)

# Specificity
cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()
spec = tn / (tn+fp)

# MCC score
mcc = matthews_corrcoef(y_true, y_pred)

# F1 score
f1 = f1_score(y_true, y_pred)

# AUC-ROC
auroc = roc_auc_score(y_true, y_prob)

# AUPRC
auprc = average_precision_score(y_true, y_prob)

print (f'accuracy: {acc}, precision: {prec}, recall: {rec}, specificity: {spec}, mcc: {mcc} ,f1-score: {f1}, auc: {auroc}, prc: {auprc}')



accuracy: 0.8875, precision: 0.3981123981123981, recall: 0.464, specificity: 0.92985, mcc: 0.36787355443507613 ,f1-score: 0.4285384437774186, auc: 0.642992375, prc: 0.37191866467616624
