In [1]:
# Import necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import glob
import plotly.express as px


In [2]:
# Define the path to the CSV file
csv_file_path = "../../Jazzer_surf/3d_predictions/chimeras/test/interactions_all_files.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Take only the rows where the resn is MG and the interacting_resn is ATP or ADP
df = df[(df['resn'] == 'MG') & ((df['interacting_resn'] == 'ADP') | (df['interacting_resn'] == 'ATP'))]

# Order the DataFrame by the column interacting_atom but in the order PA, O1A, O2A, O3A, PB, O1B, O2B, O3B, PG, O1G, O2G, O3G
atom_order = ['PA', 'O1A', 'O2A', 'O3A', 'PB', 'O1B', 'O2B', 'O3B', 'PG', 'O2G', 'O1G', 'O3G']
df['atom_order'] = pd.Categorical(df['interacting_atom'], categories=atom_order, ordered=True)
df = df.sort_values(by='atom_order')

# Create an interactive strip plot using Plotly
fig = px.strip(df, x='interacting_atom', y='distance (angstroms)', color='file',
               category_orders={"interacting_atom": atom_order},
               title='Distance between MG and ATP/ADP atoms',
               labels={'interacting_atom': 'Interacting atom', 'distance (angstroms)': 'Distance (angstroms)'},
               hover_data=['file'])

# Update the layout to fix the y-axis and ensure all x-axis values are shown
fig.update_layout(
    legend_title_text='File',
    legend=dict(
        title=dict(text='File'),
        itemsizing='constant'
    ),
    xaxis_title='Interacting atom',
    yaxis_title='Distance (angstroms)',
    yaxis=dict(range=[0, 5]),
    xaxis=dict(categoryorder='array', categoryarray=atom_order)
)

# Show the plot
fig.show()


In [3]:
# Define the path to the CSV file
csv_file_path = "../../Jazzer_surf/3d_predictions/chimeras/test/interactions_all_files.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

df = df.loc[df.groupby(['file', 'chain', 'resi', 'resn', 'interacting_chain', 'interacting_resn'])['distance (angstroms)'].idxmin()]

# combine the "resi" and "residue_one_letter" into a new column called "residue"
df['residue'] = df['resi'].astype(str) + df['residue_one_letter']

df.head()

Unnamed: 0,file,chain,resi,resn,atom_name,interacting_atom,interacting_resn,interacting_chain,distance (angstroms),residue_one_letter,full_atom_name,interacting_full_atom_name,residue
3557,a_atp,A,5,ARG,NH2,OH,TYR,B,2.754006,R,Nitrogen eta 2,Unknown,5R
2943,a_atp,A,18,ARG,NH2,N6,ATP,C,3.543884,R,Nitrogen eta 2,Nitrogen 6,18R
3465,a_atp,A,19,PHE,O,N6,ATP,C,4.447849,F,Oxygen,Nitrogen 6,19F
3032,a_atp,A,20,ARG,CB,N7,ATP,C,3.693005,R,Carbon beta,Nitrogen 7,20R
3903,a_atp,A,21,PRO,CD,N1,ATP,C,3.591886,P,Carbon delta,Nitrogen 1,21P


In [4]:
#df loc of chain A with chain C and file a_atp_mg
df = df[(df['chain'] == 'A') & (df['interacting_chain'] == 'C') & (df['file'] == 'a_atp_mg')]

df['residue'].values

array(['18R', '19F', '20R', '21P', '61P', '93Q', '94T', '95S', '96S',
       '97G', '98K', '99T', '100H', '101T', '205N', '208S', '209S',
       '238D', '239L', '240A', '241G'], dtype=object)

In [7]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool, CDSView, BooleanFilter
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10, Category20

# Enable Bokeh output in the notebook
output_notebook()

# Define the path to the CSV file
csv_file_path = "../../Jazzer_surf/3d_predictions/chimeras/test/interactions_all_files.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Filter to get the shortest distances for each interaction
df = df.loc[df[df['chain'] == 'A'].groupby(['resi', 'interacting_chain'])['distance (angstroms)'].idxmin()]

# Exclude interactions with Mg and chain letters A, G, H
df = df[(df['interacting_resn'] != 'MG') & (~df['interacting_chain'].isin(['A', 'G', 'H']))]

# Map interacting_resn to single-letter codes for better readability
aa_dict = {
    'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G',
    'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',
    'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'
}
df['residue_one_letter'] = df['resn'].map(aa_dict)
df['residue'] = df['resi'].astype(str) + df['residue_one_letter']

# Replace chain letters with descriptive names
chain_map = {
    'B': 'kinesin B',
    'C': 'alpha-tubulin',
    'D': 'beta-tubulin',
    'E': 'ATP',
    'F': 'ATP'
}
df['interacting_chain'] = df['interacting_chain'].map(chain_map)

# Create a ColumnDataSource
source = ColumnDataSource(df)

# Create a color map for interacting chains
unique_interacting_chains = df['interacting_chain'].unique()
palette = Category10[10] if len(unique_interacting_chains) <= 10 else Category20[20]
color_map = factor_cmap('interacting_chain', palette=palette, factors=unique_interacting_chains)

# Create the figure
p = figure(width=900, height=600, title="Distance between MG and ATP/ADP atoms by amino acid position",
           x_axis_label="Residue Index", y_axis_label="Distance (angstroms)", x_range=(0, 401), y_range=(0, 5.25))

# Add scatter plot with views for each chain
for chain in unique_interacting_chains:
    bool_filter = BooleanFilter([x == chain for x in df['interacting_chain']])
    view = CDSView(source=source, filters=[bool_filter])
    scatter = p.scatter(x='resi', y='distance (angstroms)', source=source, size=10, color=color_map, alpha=0.6, legend_label=chain, view=view)

# Add hover tool
hover = HoverTool()
hover.tooltips = [
    ('Resi', '@residue'),
    ('Distance', '@{distance (angstroms)}{0.2f} Å'),
    ('Chain', '@chain'),
    ('Interacting Chain', '@interacting_chain'),
    ('Interacting Resn', '@interacting_resn'),
    ('interacting atom', '@interacting_full_atom_name')
]
p.add_tools(hover)

# Add labels
labels = p.text(x='resi', y='distance (angstroms)', text='residue_one_letter', source=source, text_align='center', text_baseline='middle', text_font_size='10pt')

# Customize legend
p.legend.title = 'Interacting Chain'
p.legend.location = 'bottom_right'
p.legend.click_policy = 'mute'

# Show the plot
show(p)




In [None]:
df