# NMR model analysis
## Load codes

In [1]:
import pandas as pd
from pathlib import Path
import json

hard_strucs = ["2jmrA","2lelA","2n54B","2naoF","2nntA","2namA"]
parent_path = Path("/data/jgut/msa-tests/")
df = pd.read_csv(parent_path/"porter_data.csv", header=None)

def open_ost(ost_path:Path):
    if not ost_path.exists():
        return -1, -1, -1, -1, -1, -1 ,-1
    with open(ost_path) as json_data:
        score_json = json.load(json_data)
    lddt = score_json["lddt"] if "lddt" in score_json else 0
    rmsd = score_json["rmsd"] if "rmsd" in score_json else 100
    bb_lddt = score_json["bb_lddt"] if "bb_lddt" in score_json else 0
    tm_score = score_json["tm_score"] if "tm_score" in score_json else 0
    inconsistent_residues = score_json["inconsistent_residues"] if "inconsistent_residues" in score_json else -1
    length = len(score_json["local_lddt"]) if "local_lddt" in score_json else -1
    model_bad_bonds = len(score_json["reference_bad_bonds"]) if "reference_bad_bonds" in score_json else -1
    model_bad_angles = len(score_json["reference_bad_angles"]) if "reference_bad_angles" in score_json else -1
    return lddt, bb_lddt, tm_score, rmsd, inconsistent_residues, length, model_bad_bonds, model_bad_angles

## Make DataFrame

In [6]:
scores = []
nmr_within = []
for it, row in list(df.iterrows()):
    struc_a = row[0]
    struc_b = row[1]
    case_name = struc_a+struc_b
    if struc_a in hard_strucs:
        struc_b = struc_a
    if not (struc_b in hard_strucs):
        continue
    comparison_path = parent_path/"aaa_porter_all_models/porter_all_models"/case_name/f"{struc_b}_NMR_strucs"
    for file_path in comparison_path.glob("*pred_score.json"):
        lddt, bb_lddt, tm_score, rmsd, inconsistent_residues, length, model_bad_bonds, model_bad_angles = open_ost(file_path)
        if lddt>0:
            rank = int(str(file_path.name).split("_")[4])
            model = str(file_path.name).split("_")[-3]
            curr_entry = {"struc":struc_b, "rank":rank, "model":model, "lddt": lddt,"tm": tm_score, "rmsd": rmsd,}
            scores.append(curr_entry)
    for file_path in comparison_path.glob("*org_score.json"):
        lddt, bb_lddt, tm_score, rmsd, inconsistent_residues, length, model_bad_bonds, model_bad_angles = open_ost(file_path)
        if lddt>0:
            other = int(str(file_path.name).split("_")[1])
            curr_entry = {"struc":struc_b, "other":other, "lddt": lddt,"tm": tm_score, "rmsd": rmsd,}
            nmr_within.append(curr_entry)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
scores = pd.DataFrame(scores)
scores_max = scores.groupby("struc").max()
scores_max[["lddt", "tm"]]

Unnamed: 0_level_0,lddt,tm
struc,Unnamed: 1_level_1,Unnamed: 2_level_1
2jmrA,0.702,0.427
2lelA,0.586,0.322
2n54B,0.464,0.332
2namA,0.57,0.331
2naoF,0.477,0.252
2nntA,0.524,0.326


## Summary

In [7]:
nmr_within = pd.DataFrame(nmr_within)
print("All submitted structures")
print(nmr_within[["struc","lddt"]].groupby("struc").count())
print("All accepted structures")
print(nmr_within[["struc","lddt"]][nmr_within.lddt>=0.7].groupby("struc").count())
print("All values minus 1 for the initial structure")

All submitted structures
       lddt
struc      
2jmrA    20
2lelA    20
2n54B    20
2namA     5
2naoF    10
2nntA    10
All accepted structures
       lddt
struc      
2jmrA     1
2lelA    12
2n54B     1
2namA     1
2naoF    10
2nntA    10
All values minus 1 for the initial structure
