Install BioPython from terminal (Colab only)


In [None]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


Mount Google Drive to access data (Colab only)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Import Python packages

In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from Bio.PDB import PDBList
from Bio.PDB.PDBParser import PDBParser
import argparse
import os
import joblib

In [None]:
# Set Pandas to display all columns
pd.set_option('display.max_columns', None)

# Load file with precalculated features from scripts
This section allows to load a single tsv file coming from prior processing of a PDB structure with the `calc_features.py` and `calc_3di.py` scripts.

In [None]:
# Set path
folder_path = '/content/drive/MyDrive/Structural Bioinfo PROJECT'  # change with own input data path

# Select structure tsv file
file_path = '1i27.tsv'

In [None]:
def load_tsv_as_df(folder, filename):
    path = os.path.join(folder, filename)
    df = pd.read_csv(path, sep='\t')
    return df

In [None]:
def download_pdb_file(pdb_id, folder_path):
    pdbl = PDBList()
    pdbl.retrieve_pdb_file(pdb_id, pdir=f'{folder_path}/pdb_files', file_format='pdb')
    return os.path.join(folder_path, 'pdb_files', f'pdb{pdb_id}.ent')

In [None]:
# Load tsv file into dataframe
df = load_tsv_as_df(folder_path, file_path)

# Store identifier columns in dataframe for later usage
restored_cols = ['s_ch', 's_resi', 's_ins', 's_resn', 't_ch', 't_resi', 't_ins', 't_resn']
df_restored = df[restored_cols]

# Store pdb ID in variable
pdb_id = df['pdb_id'].iloc[0]
pdb_file = download_pdb_file(pdb_id, folder_path)

Structure exists: '/content/drive/MyDrive/Structural Bioinfo PROJECT/pdb_files/pdb1i27.ent' 


# Extract Features
In this section individual functions are defined and called to extract each additional feature from the input tsv file.

In [None]:
def get_ca_coordinates(pdb_file):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('pdb', pdb_file)
    ca_coords = {}

    for model in structure:
        for chain in model:
            for residue in chain:
                if 'CA' in residue:
                    ca = residue['CA'].get_coord()
                    res_id = residue.get_id()
                    key = (chain.id, res_id[1], res_id[2].strip())  # .strip(): remove the eventually blanck space at the end of the string
                    ca_coords[key] = ca
    return ca_coords

In [None]:
def add_delta_rsa(df):
    df['delta_rsa'] = abs(df['s_rsa'] - df['t_rsa'])
    return df

In [None]:
def add_delta_atchley(df):
    for i in range(1, 6):
        df[f'delta_atchley_{i}'] = (df[f's_a{i}'] - df[f't_a{i}']).abs()
    return df

In [None]:
def calculate_same_chain(df):
    df['same_chain'] = (df['s_ch'] == df['t_ch']).astype(int)
    return df

In [None]:
def calculate_ca_distance(row, ca_coords_dict):
    s_key = (row['s_ch'], row['s_resi'], row['s_ins'].strip())
    t_key = (row['t_ch'], row['t_resi'], row['t_ins'].strip())

    if s_key in ca_coords_dict and t_key in ca_coords_dict:
        dist = np.linalg.norm(ca_coords_dict[s_key] - ca_coords_dict[t_key])  # euclidean distance
        return dist
    else:
        return np.nan

In [None]:
def load_centroids(folder_path):
    file_path = folder_path + '/classification_ring/3di_model/states.txt'
    centroids = np.loadtxt(file_path)
    return centroids

In [None]:
# To manage possible NAN values
def map_centroid(centroids, coord_index, axis):
    try:
        return centroids[int(coord_index), axis]
    except (ValueError, TypeError, IndexError):
        return np.nan

In [None]:
def add_3di_state_centroids(df, centroids):
    df['s_centroid_x'] = df['s_3di_state'].apply(lambda i: map_centroid(centroids, i, 0))
    df['s_centroid_y'] = df['s_3di_state'].apply(lambda i: map_centroid(centroids, i, 1))
    df['t_centroid_x'] = df['t_3di_state'].apply(lambda i: map_centroid(centroids, i, 0))
    df['t_centroid_y'] = df['t_3di_state'].apply(lambda i: map_centroid(centroids, i, 1))
    return df

In [None]:
def extract_features(df, ca_coords_dict, centroids):
    df = calculate_same_chain(df)
    df = add_delta_rsa(df)
    df = add_delta_atchley(df)
    df['ca_distance'] = df.apply(lambda row: calculate_ca_distance(row, ca_coords_dict), axis=1)
    df = add_3di_state_centroids(df, centroids)
    return df

In [None]:
# Extract features on data
ca_coordinates = get_ca_coordinates(pdb_file)
df = extract_features(df, ca_coordinates, load_centroids(folder_path))

In [None]:
print('Dataframe initial size:', df.shape)

Dataframe initial size: (96, 44)


# Drop Features
Features that have been discarded during model developement are dropped from the data.

In [None]:
def drop_features(df):
    df = df.drop(columns=['s_3di_letter', 't_3di_letter', 's_3di_state', 't_3di_state', 'pdb_id', 's_ch', 't_ch', 's_ins', 't_ins'])
    if 'Interaction' in df.columns:
        df = df.drop(columns=['Interaction'])
    return df

In [None]:
df = drop_features(df)
print('Dataframe current size:', df.shape)

Dataframe current size: (96, 34)


# Drop Missing Data and duplicate rows
Our model has been trained on complete observations, thus we drop rows with missing values.

Moreover, the model is capable of predicting multiple interactions in the same residue pair and output them in a single row (as a single observation with multiple labels), thus we can remove duplicated input datapoints.

In [None]:
df = df.dropna()
print('Dataframe current size:', df.shape)

Dataframe current size: (92, 34)


In [None]:
df = df.drop_duplicates()
print('Dataframe current size:', df.shape)

Dataframe current size: (77, 34)


# One-Hot Encode Features
The data pre-processing pipeline requires categorical variables to be OneHot encoded.

To perform OHE consistently with the trained models, the same OHE mapping generated during model training is loaded and used to transform novel input.

In [None]:
def set_categorical_features(df):
    categorical_cols = ['s_resn', 't_resn', 's_ss8', 't_ss8']
    df[categorical_cols] = df[categorical_cols].astype('category')
    return df

In [None]:
def load_one_hot_encoder(folder_path):
    file_path = folder_path + '/onehot_encoder.pkl'
    one_hot_encoder = joblib.load(file_path)
    return one_hot_encoder

In [None]:
def one_hot_encode(df, encoder):
    categorical_cols = ['s_resn', 't_resn', 's_ss8', 't_ss8']
    df_ohe = encoder.transform(df[categorical_cols])
    df_final = df.drop(columns=categorical_cols).reset_index(drop=True)
    encoded_cols = encoder.get_feature_names_out(categorical_cols)
    df_final = pd.concat([df_final, pd.DataFrame(df_ohe, columns=encoded_cols)], axis=1)
    return df_final

In [None]:
# Run OHE
df = set_categorical_features(df)
encoder = load_one_hot_encoder(folder_path)
df_input_final = one_hot_encode(df, encoder)
print('Dataframe current size:', df_OHE.shape)

Dataframe current size: (77, 88)


# OvA XGBoost

## Load XGBoost Models
The trained binary models are loaded in the workspace and the model ensemble is generated.

In [None]:
def load_xgb_model(model_path):
    model = joblib.load(model_path)
    return model

In [None]:
xgb_hbond = load_xgb_model(f"{folder_path}/datasets/results_new_no_smote/HBOND.joblib")
xgb_ionic = load_xgb_model(f"{folder_path}/datasets/results_new_no_smote/IONIC.joblib")
xgb_pication = load_xgb_model(f"{folder_path}/datasets/results_new_no_smote/PICATION.joblib")
xgb_pihbond = load_xgb_model(f"{folder_path}/datasets/results_new_no_smote/PIHBOND.joblib")
xgb_pipistack = load_xgb_model(f"{folder_path}/datasets/results_new_no_smote/PIPISTACK.joblib")
xgb_ssbond = load_xgb_model(f"{folder_path}/datasets/results_new_no_smote/SSBOND.joblib")
xgb_vdw = load_xgb_model(f"{folder_path}/datasets/results_new_no_smote/VDW.joblib")



In [None]:
# Define model paths and corresponding labels
interaction_models = {
    'HBOND': xgb_hbond,
    'IONIC': xgb_ionic,
    'PICATION': xgb_pication,
    'PIHBOND': xgb_pihbond,
    'PIPISTACK': xgb_pipistack,
    'SSBOND': xgb_ssbond,
    'VDW': xgb_vdw,
}

## Make predictions
A custom function is defined to perform **multi-label classification** on the pre-processed input data:

- Each model predicts whether the input instance belongs to its corresponding class.
- The individual predictions are **appended as new columns** to the input DataFrame.
- For each label, a **confidence score** is added, corresponding to the probability output by the model. This score reflects the model's confidence in the presence of that specific class.


In [None]:
# Make predictions for each model
def make_predictions(df, interaction_models):
    features = df.columns.tolist()
    for interaction, model in interaction_models.items():
        label = model.predict(df[features])
        score = model.predict_proba(df[features])[:, 1]
        df[f'{interaction}'] = label.astype(int)
        df[f'{interaction}_SCORE'] = score

    return df

In [None]:
df_pred = make_predictions(df_input_final, interaction_models)

In [None]:
df_pred.head()

Unnamed: 0,s_resi,s_rsa,s_phi,s_psi,s_a1,s_a2,s_a3,s_a4,s_a5,t_resi,t_rsa,t_phi,t_psi,t_a1,t_a2,t_a3,t_a4,t_a5,same_chain,delta_rsa,delta_atchley_1,delta_atchley_2,delta_atchley_3,delta_atchley_4,delta_atchley_5,ca_distance,s_centroid_x,s_centroid_y,t_centroid_x,t_centroid_y,s_resn_A,s_resn_C,s_resn_D,s_resn_E,s_resn_F,s_resn_G,s_resn_H,s_resn_I,s_resn_K,s_resn_L,s_resn_M,s_resn_N,s_resn_P,s_resn_Q,s_resn_R,s_resn_S,s_resn_T,s_resn_V,s_resn_W,s_resn_Y,t_resn_A,t_resn_C,t_resn_D,t_resn_E,t_resn_F,t_resn_G,t_resn_H,t_resn_I,t_resn_K,t_resn_L,t_resn_M,t_resn_N,t_resn_P,t_resn_Q,t_resn_R,t_resn_S,t_resn_T,t_resn_V,t_resn_W,t_resn_Y,s_ss8_-,s_ss8_B,s_ss8_E,s_ss8_G,s_ss8_H,s_ss8_I,s_ss8_P,s_ss8_S,s_ss8_T,t_ss8_-,t_ss8_B,t_ss8_E,t_ss8_G,t_ss8_H,t_ss8_I,t_ss8_P,t_ss8_S,t_ss8_T,HBOND,HBOND_SCORE,IONIC,IONIC_SCORE,PICATION,PICATION_SCORE,PIHBOND,PIHBOND_SCORE,PIPISTACK,PIPISTACK_SCORE,SSBOND,SSBOND_SCORE,VDW,VDW_SCORE
0,495,0.616,-1.074,-0.799,0.931,-0.179,-3.005,-0.503,-1.853,499,0.597,-1.247,-0.838,1.538,-0.055,1.502,0.44,2.897,1,0.019,0.607,0.124,4.507,0.943,4.75,6.290668,0.778631,-2.165999,0.778631,-2.165999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,0.992856,0,9e-05,0,6.531802e-07,0,1.2e-05,0,2.816271e-06,0,5.910414e-07,0,0.475706
1,457,0.607,-0.996,-0.764,1.05,0.302,-3.656,-0.259,-3.242,460,0.407,-1.044,-0.762,1.538,-0.055,1.502,0.44,2.897,1,0.2,0.488,0.357,5.158,0.699,6.139,5.098444,0.778631,-2.165999,-1.106118,-1.339661,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,0.461257,0,0.062818,0,1.280574e-05,0,5e-06,0,8.561173e-07,0,8.081181e-07,1,0.656705
2,469,0.092,-1.464,3.005,-0.032,0.326,2.213,0.908,1.313,472,0.35,-1.072,-0.765,1.05,0.302,-3.656,-0.259,-3.242,1,0.258,1.082,0.024,5.869,1.167,4.555,5.009402,0.494826,-0.420486,2.149514,-0.802992,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,0.962968,0,3e-06,0,1.125295e-05,0,8e-06,0,1.173509e-06,0,7.715222e-07,1,0.867645
3,458,0.0,-1.171,-0.768,-0.591,-1.302,-0.733,1.57,-0.146,461,0.492,-0.979,-0.797,1.538,-0.055,1.502,0.44,2.897,1,0.492,2.129,1.247,2.235,1.13,3.043,5.179227,0.778631,-2.165999,0.778631,-2.165999,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,0.036979,0,5e-06,0,2.446928e-05,0,2.5e-05,0,5.904037e-07,0,8.088267e-07,1,0.90404
4,471,0.717,-1.043,-0.74,1.831,-0.561,0.533,-0.277,1.648,474,0.28,-1.026,-0.583,-1.019,-0.987,-1.505,1.266,-0.912,1,0.437,2.85,0.426,2.038,1.543,2.56,5.137738,0.778631,-2.165999,-1.140001,-2.006822,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,0.022717,0,1e-05,0,1.000901e-06,0,4e-06,0,4.913673e-07,0,8.166931e-07,1,0.948188


## Process output
In this section the prediction output is processed and polished in order to align to the requested format.

### Reverse OHE

In [None]:
def reverse_one_hot_encode(df, encoder, categorical_cols):
    """Reverses one-hot encoding for specified columns in a DataFrame.

    Args:
        df: DataFrame with one-hot encoded columns.
        encoder: The fitted OneHotEncoder object.
        categorical_cols: List of original categorical column names.

    Returns:
        DataFrame with reversed one-hot encoding.
    """
    encoded_columns_to_reverse = encoder.get_feature_names_out(categorical_cols)
    df_encoded_part = df[encoded_columns_to_reverse]

    decoded_data = encoder.inverse_transform(df_encoded_part)

    decoded_df = pd.DataFrame(decoded_data, columns=categorical_cols)

    df_pred_dropped_encoded = df.drop(columns=encoded_columns_to_reverse)

    df_pred_reversed_ohe = pd.concat([df_pred_dropped_encoded.reset_index(drop=True), decoded_df.reset_index(drop=True)], axis=1)

    return df_pred_reversed_ohe

In [None]:
# Define categorical columns and reverse OHE
categorical_cols = ['s_resn', 't_resn', 's_ss8', 't_ss8']
df_pred_reversed_ohe = reverse_one_hot_encode(df_pred, encoder, categorical_cols)

In [None]:
df_pred_reversed_ohe.head()

Unnamed: 0,s_resi,s_rsa,s_phi,s_psi,s_a1,s_a2,s_a3,s_a4,s_a5,t_resi,t_rsa,t_phi,t_psi,t_a1,t_a2,t_a3,t_a4,t_a5,same_chain,delta_rsa,delta_atchley_1,delta_atchley_2,delta_atchley_3,delta_atchley_4,delta_atchley_5,ca_distance,s_centroid_x,s_centroid_y,t_centroid_x,t_centroid_y,HBOND,HBOND_SCORE,IONIC,IONIC_SCORE,PICATION,PICATION_SCORE,PIHBOND,PIHBOND_SCORE,PIPISTACK,PIPISTACK_SCORE,SSBOND,SSBOND_SCORE,VDW,VDW_SCORE,s_resn,t_resn,s_ss8,t_ss8
0,495,0.616,-1.074,-0.799,0.931,-0.179,-3.005,-0.503,-1.853,499,0.597,-1.247,-0.838,1.538,-0.055,1.502,0.44,2.897,1,0.019,0.607,0.124,4.507,0.943,4.75,6.290668,0.778631,-2.165999,0.778631,-2.165999,1,0.992856,0,9e-05,0,6.531802e-07,0,1.2e-05,0,2.816271e-06,0,5.910414e-07,0,0.475706,Q,R,H,H
1,457,0.607,-0.996,-0.764,1.05,0.302,-3.656,-0.259,-3.242,460,0.407,-1.044,-0.762,1.538,-0.055,1.502,0.44,2.897,1,0.2,0.488,0.357,5.158,0.699,6.139,5.098444,0.778631,-2.165999,-1.106118,-1.339661,0,0.461257,0,0.062818,0,1.280574e-05,0,5e-06,0,8.561173e-07,0,8.081181e-07,1,0.656705,D,R,H,H
2,469,0.092,-1.464,3.005,-0.032,0.326,2.213,0.908,1.313,472,0.35,-1.072,-0.765,1.05,0.302,-3.656,-0.259,-3.242,1,0.258,1.082,0.024,5.869,1.167,4.555,5.009402,0.494826,-0.420486,2.149514,-0.802992,1,0.962968,0,3e-06,0,1.125295e-05,0,8e-06,0,1.173509e-06,0,7.715222e-07,1,0.867645,T,D,-,H
3,458,0.0,-1.171,-0.768,-0.591,-1.302,-0.733,1.57,-0.146,461,0.492,-0.979,-0.797,1.538,-0.055,1.502,0.44,2.897,1,0.492,2.129,1.247,2.235,1.13,3.043,5.179227,0.778631,-2.165999,0.778631,-2.165999,0,0.036979,0,5e-06,0,2.446928e-05,0,2.5e-05,0,5.904037e-07,0,8.088267e-07,1,0.90404,A,R,H,H
4,471,0.717,-1.043,-0.74,1.831,-0.561,0.533,-0.277,1.648,474,0.28,-1.026,-0.583,-1.019,-0.987,-1.505,1.266,-0.912,1,0.437,2.85,0.426,2.038,1.543,2.56,5.137738,0.778631,-2.165999,-1.140001,-2.006822,0,0.022717,0,1e-05,0,1.000901e-06,0,4e-06,0,4.913673e-07,0,8.166931e-07,1,0.948188,K,L,H,H


### Reformat predictions

In [None]:
def reformat_predictions(df, interaction_types, df_restored):
    """Reformats prediction columns, adds interaction and score lists,
    and cleans up temporary columns.

    Args:
        df: DataFrame with prediction labels and scores for each interaction type.
        interaction_types: List of interaction type names.
        df_restored: DataFrame with original identifier columns.

    Returns:
        DataFrame with re-formatted predictions and restored identifier columns.
    """
    interaction_list = []
    score_list = []

    for index, row in df.iterrows():
        predicted_interactions = []
        predicted_scores = []
        for interaction in interaction_types:
            if row[interaction] == 1:
                predicted_interactions.append(interaction)
                # Round the score to 4 decimal places
                predicted_scores.append(round(row[f'{interaction}_SCORE'], 4))

        interaction_list.append(predicted_interactions)
        score_list.append(predicted_scores)

    df['Interaction'] = interaction_list
    df['score'] = score_list

    # Drop temporary columns
    for inter_type in interaction_types:
        df = df.drop(columns=[inter_type, f'{inter_type}_SCORE'])

    # Drop duplicate columns (they come from OHE and features and match the others)
    df = df.drop(columns=['s_resn', 't_resn', 's_resi', 't_resi'])

    # Restore original columns
    df_restored = df_restored.loc[df.index].reset_index(drop=True)
    df_final_output = pd.concat([df_restored, df], axis=1)

    return df_final_output

In [None]:
# Define the list of interaction types
interaction_types = ['HBOND', 'IONIC', 'PICATION', 'PIHBOND', 'PIPISTACK', 'SSBOND', 'VDW']

# Re-format predictions
df_pred_final = reformat_predictions(df_pred_reversed_ohe.copy(), interaction_types, df_restored)



In [None]:
df_pred_final.head()

Unnamed: 0,s_ch,s_resi,s_ins,s_resn,t_ch,t_resi,t_ins,t_resn,s_rsa,s_phi,s_psi,s_a1,s_a2,s_a3,s_a4,s_a5,t_rsa,t_phi,t_psi,t_a1,t_a2,t_a3,t_a4,t_a5,same_chain,delta_rsa,delta_atchley_1,delta_atchley_2,delta_atchley_3,delta_atchley_4,delta_atchley_5,ca_distance,s_centroid_x,s_centroid_y,t_centroid_x,t_centroid_y,s_ss8,t_ss8,Interaction,score
0,A,495,,Q,A,499,,R,0.616,-1.074,-0.799,0.931,-0.179,-3.005,-0.503,-1.853,0.597,-1.247,-0.838,1.538,-0.055,1.502,0.44,2.897,1,0.019,0.607,0.124,4.507,0.943,4.75,6.290668,0.778631,-2.165999,0.778631,-2.165999,H,H,[HBOND],[0.9929]
1,A,457,,D,A,460,,R,0.607,-0.996,-0.764,1.05,0.302,-3.656,-0.259,-3.242,0.407,-1.044,-0.762,1.538,-0.055,1.502,0.44,2.897,1,0.2,0.488,0.357,5.158,0.699,6.139,5.098444,0.778631,-2.165999,-1.106118,-1.339661,H,H,[VDW],[0.6567]
2,A,469,,T,A,472,,D,0.092,-1.464,3.005,-0.032,0.326,2.213,0.908,1.313,0.35,-1.072,-0.765,1.05,0.302,-3.656,-0.259,-3.242,1,0.258,1.082,0.024,5.869,1.167,4.555,5.009402,0.494826,-0.420486,2.149514,-0.802992,-,H,"[HBOND, VDW]","[0.963, 0.8676]"
3,A,458,,A,A,461,,R,0.0,-1.171,-0.768,-0.591,-1.302,-0.733,1.57,-0.146,0.492,-0.979,-0.797,1.538,-0.055,1.502,0.44,2.897,1,0.492,2.129,1.247,2.235,1.13,3.043,5.179227,0.778631,-2.165999,0.778631,-2.165999,H,H,[VDW],[0.904]
4,A,471,,K,A,474,,L,0.717,-1.043,-0.74,1.831,-0.561,0.533,-0.277,1.648,0.28,-1.026,-0.583,-1.019,-0.987,-1.505,1.266,-0.912,1,0.437,2.85,0.426,2.038,1.543,2.56,5.137738,0.778631,-2.165999,-1.140001,-2.006822,H,H,[VDW],[0.9482]


## Save prediction

In [None]:
# Save output table
output_path = os.path.join(folder_path, f"{pdb_id}_predictions.tsv")
df_pred_final.to_csv(output_path, sep='\t', index=False)
print(f"Prediction table saved to: {output_path}")

Prediction table saved to: /content/drive/MyDrive/Structural Bioinfo PROJECT/1i27_predictions.tsv
