# 1. Chem

In [4]:
import sys
sys.path.append('../../')

In [6]:
from pepkit.io import setup_logging, read_csv
logger = setup_logging()
df = read_csv('../data.csv')

In [7]:
# or load from example
from pepkit.examples import data
df = data.example_df

In [8]:
from pepkit.chem import Standardizer

std = Standardizer(remove_non_canonical=True, charge_by_pH=True, pH=7.4)
result = std.data_process(df, fasta_key='peptide_sequence', n_jobs=4)

2025-05-18 17:15:35 INFO     │ rdkit: Enabling RDKit 2025.03.2 jupyter extensions


In [9]:
from pepkit.chem import Descriptor

des = Descriptor(engine='rdkit', fasta_key='peptide_sequence', id_key='id', smiles_key='smiles')
des_result = des.calculate(result, n_jobs=4,  verbose=2)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:    2.9s
[Parallel(n_jobs=4)]: Done 100 out of 100 | elapsed:    7.3s finished


# 2. Metrics

In [10]:
import pandas as pd
import numpy as np

y_true = np.array([
    5.498509397602542,
    5.4129302889144695,
    5.249349691952619,
    4.78976015801953,
    4.20173265562076,
])
y_hat = np.array([7.467, 7.303, 7.369, 7.633, 7.52])

df = pd.DataFrame({
    "y_true": y_true,
    "y_pred": y_hat
})



In [11]:
from pepkit.metrics import compute_regression_metrics
compute_regression_metrics(y_true, y_hat)

{'PCC': np.float64(-0.6056253313530204),
 'SCC': np.float64(-0.6),
 'RMSE': np.float64(2.4913910636708234),
 'MAE': 2.427943561578016,
 'R2': -25.80083307853924}

In [12]:
from pepkit.metrics import compute_metrics_from_dataframe
compute_metrics_from_dataframe(df, ground_truth_key='y_true', 
                               pred_key='y_pred', 
                               task='regression', 
                               normalize=True)

{'PCC': np.float64(-0.6056253313530203),
 'SCC': np.float64(-0.6),
 'RMSE': np.float64(0.6672365605751011),
 'MAE': 0.6498042986752538,
 'R2': -2.232619472407412}

# Dock

In [None]:
from pepkit.dock.rosetta.refinement_dock import refinement_multiple_dock
from pepkit.examples import rosetta_data
pdb_path = rosetta_data.get_rosetta_ex_path()

refinement_multiple_dock(
    path_to_main="/path/to/rosetta/main",
    path_to_db="/path/to/rosetta/main/database",
    pdb_dir=pdb_path,
    prepack_out="data/rosetta_test/prepack",
    refinement_out="data/rosetta_test/refinement",
    nstruct=1,
)

In [13]:
from pepkit.dock.rosetta.score import extract_score, get_optimal_clx
from pepkit.examples import rosetta_data
refine_path = rosetta_data.get_refinement_path()

score = extract_score(dirpath=refine_path)
display(score)
print(get_optimal_clx(score))

Unnamed: 0,id,description,total_score,I_bsa,I_hb,I_pack,I_sc,I_unsat,dslf_fa13,fa_atr,...,rmsBB_if,rmsCA,rmsCA_if,rmsSC_CAPRI_if,rmsSC_allIF,startRMSall,startRMSallif,startRMSbb,startRMSca,yhh_planarity
0,complex_2,complex_2_0001_0001,-194.133,970.169,0.0,-0.072,-21.603,8.0,-9.763,-594.231,...,3.204,4.409,3.143,2.742,3.206,0.0,0.0,0.0,0.0,0.021
1,complex_1,complex_1_0001_0001,-185.422,491.09,2.0,-0.024,-9.169,5.0,-10.008,-545.43,...,1.757,3.266,1.629,2.741,2.72,0.0,0.0,0.0,0.0,0.021


complex_2_0001_0001
