In [None]:
#original
import os
import json
import matplotlib.pyplot as plt
from collections import Counter
from pandamap import HybridProtLigMapper


def mapper_and_json(cif_L, cif_D, ligand_L, ligand_D, output_prefix):
    mapperL = HybridProtLigMapper(cif_L, ligand_resname=ligand_L)
    mapperD = HybridProtLigMapper(cif_D, ligand_resname=ligand_D)

    mapperL.run_analysis()
    mapperD.run_analysis()

    L_data = {}
    D_data = {}

    for interaction_type, interactions in mapperL.interactions.items():
        L_data[interaction_type] = []
        for interaction in interactions:
            res = interaction["protein_residue"].__dict__
            chain = res.get("parent").__dict__.get("_id")
            interaction_data = {
                "resname": res.get("resname"),
                "position": res.get("_id")[1],
                "chain": chain
            }
            L_data[interaction_type].append(interaction_data)

    for interaction_type, interactions in mapperD.interactions.items():
        D_data[interaction_type] = []
        for interaction in interactions:
            res = interaction["protein_residue"].__dict__
            chain = res.get("parent").__dict__.get("_id")
            interaction_data = {
                "resname": res.get("resname"),
                "position": res.get("_id")[1],
                "chain": chain
            }
            D_data[interaction_type].append(interaction_data)

    L_output = {
        f"description_L": f"These are the interactions for {ligand_L}",
        "L_interactions": L_data
    }
    D_output = {
        f"description_D": f"These are the interactions for {ligand_D}",
        "D_interactions": D_data
    }

    with open(f"{output_prefix}_L_interactions.json", "w") as f:
        json.dump(L_output, f, indent=4)
    with open(f"{output_prefix}_D_interactions.json", "w") as f:
        json.dump(D_output, f, indent=4)


def extract_residue_counts_from_json(json_file, key):
    with open(json_file) as f:
        data = json.load(f)
    counter = Counter()
    for interactions in data[key].values():
        for item in interactions:
            counter[item["resname"]] += 1
    return counter


def plot_histogram(counter_L, counter_D):
    all_keys = sorted(set(counter_L.keys()).union(set(counter_D.keys())))
    L_values = [counter_L.get(k, 0) for k in all_keys]
    D_values = [counter_D.get(k, 0) for k in all_keys]

    x = range(len(all_keys))
    width = 0.35
    
    plt.figure(figsize=(14, 6))
    plt.bar([i - width/2 for i in x], L_values, width=width, label='L', color='blue')
    plt.bar([i + width/2 for i in x], D_values, width=width, label='D', color='magenta')

    plt.xlabel("Amino Acid (resname)")
    plt.ylabel("Frequency in Interactions")
    plt.title("Frequency of Amino Acids in L vs D Interactions (All Species)")
    plt.xticks(x, all_keys, rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.savefig("all_species_interaction_histogram.png")
    plt.close()


if __name__ == "__main__":
    species = ["tkMAT", "bsMAT", "cuMAT", "ecMAT", "mjMAT", "mmaMAT", "uuMAT", "scMAT"]
    counter_L_total = Counter()
    counter_D_total = Counter()

    for specie in species:
        if specie == "tkMAT":
            counter_L = extract_residue_counts_from_json("L_interactions.json", key="L_interactions")
            counter_D = extract_residue_counts_from_json("D_interactions.json", key="D_interactions")
        else:
            cif_L = os.path.join("predicted_L", f"{specie}_L.cif")
            cif_D = os.path.join("predicted_D", f"{specie}_D.cif")
            ligand = "LIG3"

            mapper_and_json(cif_L, cif_D, ligand_L=ligand, ligand_D=ligand, output_prefix=specie)

            counter_L = extract_residue_counts_from_json(f"{specie}_L_interactions.json", key="L_interactions")
            counter_D = extract_residue_counts_from_json(f"{specie}_D_interactions.json", key="D_interactions")

        counter_L_total.update(counter_L)
        counter_D_total.update(counter_D)

    plot_histogram(counter_L_total, counter_D_total)

In [1]:
#hier die Positionen statt die Namen der Aminosäuren plotten
import os
import json
import matplotlib.pyplot as plt
from collections import Counter
from pandamap import HybridProtLigMapper


def mapper_and_json(cif_L, cif_D, ligand_L, ligand_D, output_prefix):
    mapperL = HybridProtLigMapper(cif_L, ligand_resname=ligand_L)
    mapperD = HybridProtLigMapper(cif_D, ligand_resname=ligand_D)

    mapperL.run_analysis()
    mapperD.run_analysis()

    L_data = {}
    D_data = {}

    for interaction_type, interactions in mapperL.interactions.items():
        L_data[interaction_type] = []
        for interaction in interactions:
            res = interaction["protein_residue"].__dict__
            chain = res.get("parent").__dict__.get("_id")
            interaction_data = {
                "resname": res.get("resname"),
                "position": res.get("_id")[1],
                "chain": chain
            }
            L_data[interaction_type].append(interaction_data)

    for interaction_type, interactions in mapperD.interactions.items():
        D_data[interaction_type] = []
        for interaction in interactions:
            res = interaction["protein_residue"].__dict__
            chain = res.get("parent").__dict__.get("_id")
            interaction_data = {
                "resname": res.get("resname"),
                "position": res.get("_id")[1],
                "chain": chain
            }
            D_data[interaction_type].append(interaction_data)

    L_output = {
        f"description_L": f"These are the interactions for {ligand_L}",
        "L_interactions": L_data
    }
    D_output = {
        f"description_D": f"These are the interactions for {ligand_D}",
        "D_interactions": D_data
    }

    with open(f"{output_prefix}_L_interactions.json", "w") as f:
        json.dump(L_output, f, indent=4)
    with open(f"{output_prefix}_D_interactions.json", "w") as f:
        json.dump(D_output, f, indent=4)


def extract_position_counts_from_json(json_file, key):
    with open(json_file) as f:
        data = json.load(f)
    counter = Counter()
    for interactions in data[key].values():
        for item in interactions:
            counter[item["position"]] += 1
    return counter


def plot_histogram(counter_L, counter_D):
    all_positions = sorted(set(counter_L.keys()).union(set(counter_D.keys())))
    L_values = [counter_L.get(p, 0) for p in all_positions]
    D_values = [counter_D.get(p, 0) for p in all_positions]

    x = range(len(all_positions))
    width = 0.35

    plt.figure(figsize=(16, 6))
    plt.bar([i - width/2 for i in x], L_values, width=width, label='L', color='blue')
    plt.bar([i + width/2 for i in x], D_values, width=width, label='D', color='magenta')

    plt.xlabel("Residue Position")
    plt.ylabel("Frequency in Interactions")
    plt.title("Frequency of Residue Positions in L vs D Interactions (All Species)")
    plt.xticks(x, all_positions, rotation=90)
    plt.legend()
    plt.tight_layout()
    plt.savefig("all_species_position_histogram.png")
    plt.close()


if __name__ == "__main__":
    species = ["tkMAT", "bsMAT", "cuMAT", "ecMAT", "mjMAT", "mmaMAT", "uuMAT", "scMAT"]
    counter_L_total = Counter()
    counter_D_total = Counter()

    for specie in species:
        if specie == "tkMAT":
            counter_L = extract_position_counts_from_json("L_interactions.json", key="L_interactions")
            counter_D = extract_position_counts_from_json("D_interactions.json", key="D_interactions")
        else:
            cif_L = os.path.join("predicted_L", f"{specie}_L.cif")
            cif_D = os.path.join("predicted_D", f"{specie}_D.cif")
            ligand = "LIG3"

            mapper_and_json(cif_L, cif_D, ligand_L=ligand, ligand_D=ligand, output_prefix=specie)

            counter_L = extract_position_counts_from_json(f"{specie}_L_interactions.json", key="L_interactions")
            counter_D = extract_position_counts_from_json(f"{specie}_D_interactions.json", key="D_interactions")

        counter_L_total.update(counter_L)
        counter_D_total.update(counter_D)

    plot_histogram(counter_L_total, counter_D_total)

Detecting interactions...
*** Using IMPROVED detection with stricter filtering ***
Before filtering: 31 total interactions
Calculating solvent accessibility...
Trying DSSP method first...
Error calculating solvent accessibility: 'HybridProtLigMapper' object has no attribute 'estimate_solvent_accessibility'
Falling back to realistic method
Using realistic solvent accessibility calculation...
Total protein atoms: 6188
Interacting residues to check: 26
Marking ('ASP', 253) as solvent accessible (score: 1.12)
Marking ('ILE', 105) as solvent accessible (score: 0.90)
Marking ('SER', 102) as solvent accessible (score: 1.12)
Marking ('HIS', 17) as solvent accessible (score: 1.12)
Marking ('GLU', 58) as solvent accessible (score: 1.12)
Marking ('GLY', 131) as solvent accessible (score: 1.00)
Marking ('ALA', 254) as solvent accessible (score: 0.90)
Marking ('ALA', 43) as solvent accessible (score: 0.90)
Marking ('LYS', 284) as solvent accessible (score: 1.12)
Marking ('PRO', 18) as solvent acces