In [1]:
import pandas as pd
from pathlib import Path
import json

parent_path = Path("/data/jgut/msa-tests")
df = pd.read_csv(parent_path/"porter_data.csv", header=None)

def open_ost(ost_path:Path):
    if not ost_path.exists():
        return -1, -1, -1, -1, -1, -1 ,-1
    with open(ost_path) as json_data:
        score_json = json.load(json_data)
    lddt = score_json["lddt"] if "lddt" in score_json else 0
    bb_lddt = score_json["bb_lddt"] if "bb_lddt" in score_json else 0
    tm_score = score_json["tm_score"] if "tm_score" in score_json else 0
    inconsistent_residues = score_json["inconsistent_residues"] if "inconsistent_residues" in score_json else -1
    length = len(score_json["local_lddt"]) if "local_lddt" in score_json else -1
    model_bad_bonds = len(score_json["reference_bad_bonds"]) if "reference_bad_bonds" in score_json else -1
    model_bad_angles = len(score_json["reference_bad_angles"]) if "reference_bad_angles" in score_json else -1
    return lddt, bb_lddt, tm_score, inconsistent_residues, length, model_bad_bonds, model_bad_angles

In [2]:
scores = []
for it, row in list(df.iterrows()):
    struc_a = row[0]
    struc_b = row[1]
    case_name = struc_a+struc_b
    curr_entry = {"case": case_name}
    for setup_path in ["default_porter_all"]:
        case_path = parent_path/setup_path/case_name
        print(case_path)
        for comparison in ["Aprot", "Bprot"]:
            lddt, bb_lddt, tm_score, inconsistent_residues, length, model_bad_bonds, model_bad_angles = open_ost(case_path/f"score_{comparison}.json")
            curr_entry = curr_entry|{f"{comparison}_lddt_{setup_path}": lddt,f"{comparison}_lddt_bb_{setup_path}":bb_lddt,  f"{comparison}_tm_{setup_path}": tm_score, f"{comparison}_inconsistent_residues_{setup_path}": inconsistent_residues, f"{comparison}_length": length,  f"{comparison}_bad_bonds_{setup_path}":  model_bad_bonds,  f"{comparison}_bad_angles_{setup_path}":  model_bad_angles}
    scores.append(curr_entry)

/data/jgut/msa-tests/default_porter_all/7ahlE4yhdG
/data/jgut/msa-tests/default_porter_all/1repC2z9oD
/data/jgut/msa-tests/default_porter_all/4uv2D4q79A
/data/jgut/msa-tests/default_porter_all/3tp2A5lj3O
/data/jgut/msa-tests/default_porter_all/1g2cF5c6bF
/data/jgut/msa-tests/default_porter_all/5ec5P3zxgB
/data/jgut/msa-tests/default_porter_all/1uxmK2namA
/data/jgut/msa-tests/default_porter_all/5aoeB5ly6B
/data/jgut/msa-tests/default_porter_all/1ovaA1jtiB
/data/jgut/msa-tests/default_porter_all/3gmhL2vfxL
/data/jgut/msa-tests/default_porter_all/3m1bF3lowA
/data/jgut/msa-tests/default_porter_all/3j7wB3j7vG
/data/jgut/msa-tests/default_porter_all/2lqwA2bzyB
/data/jgut/msa-tests/default_porter_all/5hmgB1htmB
/data/jgut/msa-tests/default_porter_all/4j3oF2jmrA
/data/jgut/msa-tests/default_porter_all/1miqB1qs8B
/data/jgut/msa-tests/default_porter_all/4nc9C4n9wA
/data/jgut/msa-tests/default_porter_all/3j97M1xtgB
/data/jgut/msa-tests/default_porter_all/2naoF1iytA
/data/jgut/msa-tests/default_po

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
scores = pd.DataFrame(scores)
scores

Unnamed: 0,case,Aprot_lddt_default_porter_all,Aprot_lddt_bb_default_porter_all,Aprot_tm_default_porter_all,Aprot_inconsistent_residues_default_porter_all,Aprot_length,Aprot_bad_bonds_default_porter_all,Aprot_bad_angles_default_porter_all,Bprot_lddt_default_porter_all,Bprot_lddt_bb_default_porter_all,Bprot_tm_default_porter_all,Bprot_inconsistent_residues_default_porter_all,Bprot_length,Bprot_bad_bonds_default_porter_all,Bprot_bad_angles_default_porter_all
0,7ahlE4yhdG,0.827032,0.911311,0.609194,[],293,0,0,0.448288,0.503365,0.802685,"[A.126.-A.126., A.127.-A.127., A.128.-A.128., ...",281,0,1
1,1repC2z9oD,0.831627,0.959877,0.560204,[],214,0,0,0.0,0.0,0.0,-1,-1,-1,-1
2,4uv2D4q79A,0.0,0.0,0.0,-1,-1,-1,-1,0.32342,0.696262,0.693864,"[A.249.-A.57., A.252.-A.60., A.253.-A.61., A.2...",196,0,1
3,3tp2A5lj3O,0.885371,0.932081,0.943471,"[A.46.-A.224., A.47.-A.225.]",225,0,0,0.245237,0.480806,0.742113,"[A.2.-A.2., A.3.-A.3., A.4.-A.4., A.6.-A.6., A...",198,0,1
4,1g2cF5c6bF,0.832629,0.989852,0.879606,[],36,0,0,0.828145,0.900485,0.852756,[],456,0,0
5,5ec5P3zxgB,0.0,0.0,0.0,-1,-1,-1,-1,0.850022,0.948696,0.929475,[],293,0,0
6,1uxmK2namA,0.918251,0.964461,0.985485,[],153,0,0,0.575239,0.656095,0.298764,[],153,0,14
7,5aoeB5ly6B,0.878354,0.934123,0.924688,[],488,0,0,0.720403,0.84853,0.658208,[],471,0,0
8,1ovaA1jtiB,0.864313,0.923841,0.888345,[],383,0,0,0.699027,0.744792,0.717434,[],381,6,6
9,3gmhL2vfxL,0.733597,0.843156,0.687294,[],187,0,0,0.883964,0.955769,0.880703,[],203,0,0


In [18]:
print(len(scores))

91
0     True
1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
10    True
11    True
12    True
13    True
14    True
15    True
16    True
17    True
18    True
19    True
20    True
21    True
22    True
23    True
24    True
25    True
26    True
27    True
28    True
29    True
30    True
31    True
32    True
33    True
34    True
35    True
36    True
37    True
38    True
39    True
40    True
41    True
42    True
43    True
44    True
45    True
46    True
47    True
48    True
49    True
50    True
51    True
52    True
53    True
54    True
55    True
56    True
57    True
58    True
59    True
60    True
61    True
62    True
63    True
64    True
65    True
66    True
67    True
68    True
69    True
70    True
71    True
72    True
73    True
74    True
75    True
76    True
77    True
78    True
79    True
80    True
81    True
82    True
83    True
84    True
85    True
86    True
87    True
88    True
89    True
90    T

In [19]:
sum((scores["Aprot_lddt_bb_default_porter_all"]>0.7) | (scores["Aprot_tm_default_porter_all"]>0.7))

69

In [21]:
sum((scores["Bprot_lddt_bb_default_porter_all"]>0.7) | (scores["Bprot_tm_default_porter_all"]>0.7))

73

In [23]:
sum(((scores["Aprot_lddt_bb_default_porter_all"]>0.7) | (scores["Aprot_tm_default_porter_all"]>0.7)) & sum((scores["Bprot_lddt_bb_default_porter_all"]>0.7) | (scores["Bprot_tm_default_porter_all"]>0.7)))

69