In [254]:
import plotly.express as px
import ast
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol
import os
import nglview as nv

In [197]:
from src.scripts.mutants_analysis import *

In [None]:
df_mutants = pd.read_csv('../data/mutants.csv')
df_merged = pd.read_csv('../data/merged_df.csv')
df_mutants['Target Names'] = df_mutants['Target Names'].apply(lambda x: ast.literal_eval(x))
df_mutants['BindingDB Target Chain Sequence'] = df_mutants['BindingDB Target Chain Sequence'].apply(lambda x: ast.literal_eval(x))

In [None]:
saving_folder_dfs = "src/data/pair_dfs"
saving_folder_ligands = "src/plots/ligands"

for index, row in df_mutants.iterrows():
    if len(row['Target Names']) > 10:
        df_pair = compute_variation_ic50(row, df_merged)
        wt_name = row['WT Target Name']
        smiles = row['Ligand SMILES']
        name = smiles + '_' + wt_name
        if df_pair is None:
            print("This pair will not be saved due to multiple conflicting values in BindingDB") 
        else: 
            saving_path_df = saving_folder_dfs + '/' + smiles + '_' + '_'.join(wt_name.split(' ')) +'.csv'
            df_pair.to_csv(saving_path_df)
            print("Pair information succesfully saved")

            # Creating and Saving Ligand Representation
            mol = Chem.MolFromSmiles(smiles)

            # Generate 3D coordinates for the molecule
            AllChem.EmbedMolecule(mol, randomSeed=42)
            AllChem.MMFFOptimizeMolecule(mol)

            # Get 3D coordinates from the RDKit molecule
            conf = mol.GetConformer()
            coords = [conf.GetAtomPosition(i) for i in range(mol.GetNumAtoms())]

            # Prepare the 3Dmol viewer
            viewer = py3Dmol.view(width=800, height=600)

            # Add the molecule to the viewer using the 3D coordinates
            block = Chem.MolToMolBlock(mol)
            viewer.addModel(block, 'mol')

            # Set style and show the molecule
            viewer.setStyle({'stick': {}})
            viewer.setBackgroundColor('white')

            saving_path_ligand = os.path.join(saving_folder_ligands, smiles)
            viewer.write_html(saving_path_ligand + '.html')

        print("---------------------------------------------------------------------------")

Pair information succesfully saved
---------------------------------------------------------------------------


[12:53:33] Molecule does not have explicit Hs. Consider calling AddHs()
[12:53:33] Molecule does not have explicit Hs. Consider calling AddHs()


Pair information succesfully saved


[12:53:55] Molecule does not have explicit Hs. Consider calling AddHs()
[12:53:55] Molecule does not have explicit Hs. Consider calling AddHs()


---------------------------------------------------------------------------
For this ligand-protein pair there are multiple values of IC50 and we decided to drop this case.
This pair will not be saved due to multiple conflicting values in BindingDB
---------------------------------------------------------------------------
Pair information succesfully saved
---------------------------------------------------------------------------


[12:54:19] Molecule does not have explicit Hs. Consider calling AddHs()
[12:54:19] Molecule does not have explicit Hs. Consider calling AddHs()


Pair information succesfully saved
---------------------------------------------------------------------------


[12:54:27] Molecule does not have explicit Hs. Consider calling AddHs()
[12:54:27] Molecule does not have explicit Hs. Consider calling AddHs()


Pair information succesfully saved
---------------------------------------------------------------------------


[12:54:35] Molecule does not have explicit Hs. Consider calling AddHs()
[12:54:35] Molecule does not have explicit Hs. Consider calling AddHs()


For this ligand-protein pair there are multiple values of IC50 and we decided to drop this case.
This pair will not be saved due to multiple conflicting values in BindingDB
---------------------------------------------------------------------------


In [None]:
amino_acid_dict = {
    'A': 'Alanine',
    'C': 'Cysteine',
    'D': 'Aspartic Acid',
    'E': 'Glutamic Acid',
    'F': 'Phenylalanine',
    'G': 'Glycine',
    'H': 'Histidine',
    'I': 'Isoleucine',
    'K': 'Lysine',
    'L': 'Leucine',
    'M': 'Methionine',
    'N': 'Asparagine',
    'P': 'Proline',
    'Q': 'Glutamine',
    'R': 'Arginine',
    'S': 'Serine',
    'T': 'Threonine',
    'V': 'Valine',
    'W': 'Tryptophan',
    'Y': 'Tyrosine'
}

def convert_aa_names(string):
    if string!='Deletion':
        aa1 = string.split(' -> ')[0]
        aa2 = string.split(' -> ')[1]
        return f'{amino_acid_dict[aa1]} -> {amino_acid_dict[aa2]}'
    else:
        return 'Deletion'

file_directory = 'src/data/pair_dfs'
saving_directory = 'src/plots'

for name in os.listdir(file_directory):
    path = os.path.join(file_directory, name)
    df_pair = pd.read_csv(path)
    df_pair.loc[df_pair.Type == 'gap','Mutation'] = 'Deletion'
    df_pair.Mutation = df_pair.Mutation.apply(convert_aa_names)
    # Creating and Saving IC50 plot
    fig = px.scatter(
        df_pair,
        x='Positions',  # x-axis is the mutation position
        y='IC50',  # y-axis is the IC50 value
        color='Mutant Name',  # color by the mutant name
        hover_name='Mutant Name',
        hover_data={'Positions': True, 'Mutation': True, 'IC50': True, 'Mutant Name':False},  # info to display on hover
        title='IC50 Differences of Mutants by Position',
        labels={'Positions': 'Mutation Position', 'IC50': 'IC50 Difference'},
    )

    # Customize layout if needed
    fig.update_layout(
        hovermode='closest',  # Ensure tooltips show up when hovering closest to a point
        xaxis_title='Mutation Position',
        yaxis_title='IC50 Difference',
        template='plotly_dark'  # Optional: use a dark theme (you can remove or change this)
    )

    # Show the interactive plot
    fig.show()
    saving_path = os.path.join(saving_directory, name.split('.')[0])
    fig.write_html(saving_path + '.html')    

In [260]:
# Load the PDB file
def viualize_mutant(protein_file, row=None):
    view = nv.show_file(protein_file)  # Update with the correct file path

    # Remove default representations
    if row.mutations is None:
        print("WT protein")
    else:
        print("Mutant: ", row.mutant_name)
        view.clear_representations()
        view.add_cartoon(color="#D3D3D3")

        for m in row.mutations:
            if m[0] == 'Deletion':
                view.add_cartoon(selection=m[1], color="blue")  # Highlight region 1-18 in blue
            else:
                view.add_ball_and_stick(selection=m[1], color="red")  # Highlight residue 858 in red
    nv.write_html(row.mutant_name + '.html', [view])

test_df = pd.DataFrame({'mutant_name': ['WT', 'mutant [1-18]', 'mutant [C18->T]', 'mutant [1-18][C21->T]'], 'mutations': [None, ['Deletion', '1-18'], ['C18->T', '18'], [['Deletion', '1-18'], ['C21->T', '21']]]})

for name, r in test_df.iterrows():
    viualize_mutant('pdb_files/P00533.pdb', r)

WT protein
Mutant:  mutant [1-18]
Mutant:  mutant [C18->T]
Mutant:  mutant [1-18][C21->T]


In [None]:
# Probably remove
for name, group in df_mutants[df_mutants['Target Names'].apply(lambda x: len(x)>10)].groupby('WT Target Name'):
    d = {}
    for idx, top_pair in group.iterrows():
        target_names = top_pair['Target Names']
        target_sequences = top_pair['BindingDB Target Chain Sequence']
        d.update(dict(zip(target_names, target_sequences)))

In [None]:
# Probably remove
def save_to_fasta(group_dict, filename="output.fasta"):
    """
    Saves the protein names and sequences from the dictionary to a FASTA file.
    
    :param group_dict: Dictionary where keys are protein names and values are sequences
    :param filename: Output FASTA file name (default is "output.fasta")
    """
    with open(filename, 'w') as fasta_file:
        for protein_name, sequence in group_dict.items():
            # Write each protein's name and sequence in FASTA format
            fasta_file.write(f">{protein_name}\n")
            # Write the sequence, split into multiple lines if necessary
            for i in range(0, len(sequence), 80):  # 80 characters per line
                fasta_file.write(sequence[i:i+80] + "\n")
    
    print(f"FASTA file saved as {filename}")


In [None]:
# Probably remove
for name, group in df_mutants[df_mutants['Target Names'].apply(lambda x: len(x)>10)].groupby('WT Target Name'):
    group_dict = {}
    print("-----------------------------------------")
    print('Started processing Group: ', name)
    for idx, top_pair in group.iterrows():
        target_names = top_pair['Target Names']
        target_sequences = top_pair['BindingDB Target Chain Sequence']
        group_dict.update(dict(zip(target_names, target_sequences)))
    
    save_to_fasta(group_dict, f"{name}.fasta")
    print('Finished processing Group: ', name)
    print("-----------------------------------------")

-----------------------------------------
Started processing Group:  Epidermal growth factor receptor
FASTA file saved as Epidermal growth factor receptor.fasta
Finished processing Group:  Epidermal growth factor receptor
-----------------------------------------
-----------------------------------------
Started processing Group:  cAMP-dependent protein kinase catalytic subunit alpha
FASTA file saved as cAMP-dependent protein kinase catalytic subunit alpha.fasta
Finished processing Group:  cAMP-dependent protein kinase catalytic subunit alpha
-----------------------------------------


In [None]:
# TBD
from alphafetcher import AlphaFetcher

# Instantiate the fetcher
# The base_savedir parameter allows you to set a base directory where files will be saved.
# Inside this directory, two separate directories for pdb and cif files will be created.
fetcher = AlphaFetcher(base_savedir="")
ids= ['P00533', 'P27791']
# Add desired Uniprot access codes
fetcher.add_proteins(ids)

# Retrieve metadata
fetcher.fetch_metadata(multithread=True, workers=4)
# Metadata available at fetcher.metadata_dict

# Commence download of specified files
fetcher.download_all_files(pdb=True, multithread=True, workers=4)

Fetching Metadata: 100%|██████████| 2/2 [00:00<00:00,  2.66it/s]
Fetching files: 100%|██████████| 2/2 [00:00<00:00,  7.91it/s]
