A

In [7]:
import pandas as pd
import plotly.express as px

# Define the path to the CSV file
csv_file_path = "/Users/jazzeruncal/git/Jazzer_surf/3d_predictions/chimeras/motor2x_ATP2x_Mg2x_abTub/interactions_all_files.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Filter to get the shortest distances for each interaction
df = df.loc[df[df['chain'] == 'A'].groupby(['file', 'resi', 'interacting_chain'])['distance (angstroms)'].idxmin()]

# Exclude interactions with Mg and chain letters A, G, H
df = df[(df['interacting_resn'] != 'MG') & (~df['interacting_chain'].isin(['A', 'G', 'H']))].reset_index(drop=True)

# Map interacting_resn to single-letter codes for better readability
aa_dict = {
    'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G',
    'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',
    'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'
}
df['residue_one_letter'] = df['resn'].map(aa_dict)
df['residue'] = df['resi'].astype(str) + df['residue_one_letter']

# Replace chain letters with descriptive names
chain_map = {
    'B': 'kinesin B',
    'C': 'alpha-tubulin',
    'D': 'beta-tubulin',
    'E': 'ATP',
    'F': 'ATP'
}
df['interacting_chain'] = df['interacting_chain'].map(chain_map)

# Filter to specific files
df = df.loc[df['file'].isin([
    'A', 

    ])]

# Group by file and interacting chain, keeping residues in order
grouped = df.groupby(['file', 'interacting_chain'])

# Create a list to store the output rows
output_rows = []

# Iterate over each group and extract information
for (file, chain), group in grouped:
    for _, row in group.iterrows():
        output_rows.append({
            'file': file,
            'interacting_chain': chain,
            'interacting_atom': row['interacting_atom'],
            'residue_number': row['resi'],
            'amino_acid': row['residue_one_letter'],
            'distance (angstroms)': row['distance (angstroms)'],
        })

# Convert the list to a DataFrame
interaction_df = pd.DataFrame(output_rows)

# Display the resulting DataFrame
interaction_df

interaction_df.to_csv("interaction_summary.csv", index=False)



# Load the data
csv_file_path = "interaction_summary.csv"
interaction_df = pd.read_csv(csv_file_path)

# Create a new column for the x-axis labels by concatenating residue number and amino acid
interaction_df['residue_label'] = interaction_df['residue_number'].astype(str) + interaction_df['amino_acid']

# Sort the dataframe by file and residue number to maintain order
interaction_df = interaction_df.sort_values(by=['file', 'residue_number'])

# Convert residue_label to a categorical type with the correct order
interaction_df['residue_label'] = pd.Categorical(interaction_df['residue_label'], 
                                                 categories=sorted(interaction_df['residue_label'].unique(), key=lambda x: int(''.join(filter(str.isdigit, x)))), 
                                                 ordered=True)

# Scatter plot for interaction distances with custom x-axis labels
fig_scatter = px.scatter(interaction_df, x='residue_label', y='distance (angstroms)', color='interacting_chain', text='interacting_atom',
                         title='Interaction Distances by Residue and Interacting Chain',
                         labels={'residue_label': 'Residue (Number and Identity)', 'distance (angstroms)': 'Distance (angstroms)', 'interacting_chain': 'Interacting Chain'},
                         hover_data=['file'])

fig_scatter.update_traces(textposition='top center', marker=dict(size=10, opacity=0.6))
fig_scatter.update_layout(xaxis={'categoryorder':'array', 'categoryarray': interaction_df['residue_label'].cat.categories})

# Show the plot
fig_scatter.show()



C

In [8]:
import pandas as pd
import plotly.express as px

# Define the path to the CSV file
csv_file_path = "../../Jazzer_surf/3d_predictions/chimeras/test/interactions_all_files.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Filter to get the shortest distances for each interaction
df = df.loc[df[df['chain'] == 'A'].groupby(['file', 'resi', 'interacting_chain'])['distance (angstroms)'].idxmin()]

# Exclude interactions with Mg and chain letters A, G, H
df = df[(df['interacting_resn'] != 'MG') & (~df['interacting_chain'].isin(['A', 'G', 'H']))].reset_index(drop=True)

# Map interacting_resn to single-letter codes for better readability
aa_dict = {
    'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G',
    'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',
    'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'
}
df['residue_one_letter'] = df['resn'].map(aa_dict)
df['residue'] = df['resi'].astype(str) + df['residue_one_letter']

# Replace chain letters with descriptive names
chain_map = {
    'B': 'kinesin B',
    'C': 'alpha-tubulin',
    'D': 'beta-tubulin',
    'E': 'ATP',
    'F': 'ATP'
}
df['interacting_chain'] = df['interacting_chain'].map(chain_map)

# Filter to specific files
df = df.loc[df['file'].isin([
    'C', 
    ])]

# Group by file and interacting chain, keeping residues in order
grouped = df.groupby(['file', 'interacting_chain'])

# Create a list to store the output rows
output_rows = []

# Iterate over each group and extract information
for (file, chain), group in grouped:
    for _, row in group.iterrows():
        output_rows.append({
            'file': file,
            'interacting_chain': chain,
            'interacting_atom': row['interacting_atom'],
            'residue_number': row['resi'],
            'amino_acid': row['residue_one_letter'],
            'distance (angstroms)': row['distance (angstroms)'],
        })

# Convert the list to a DataFrame
interaction_df = pd.DataFrame(output_rows)

# Display the resulting DataFrame
interaction_df

interaction_df.to_csv("interaction_summary.csv", index=False)



# Load the data
csv_file_path = "interaction_summary.csv"
interaction_df = pd.read_csv(csv_file_path)

# Create a new column for the x-axis labels by concatenating residue number and amino acid
interaction_df['residue_label'] = interaction_df['residue_number'].astype(str) + interaction_df['amino_acid']

# Sort the dataframe by file and residue number to maintain order
interaction_df = interaction_df.sort_values(by=['file', 'residue_number'])

# Convert residue_label to a categorical type with the correct order
interaction_df['residue_label'] = pd.Categorical(interaction_df['residue_label'], 
                                                 categories=sorted(interaction_df['residue_label'].unique(), key=lambda x: int(''.join(filter(str.isdigit, x)))), 
                                                 ordered=True)

# Scatter plot for interaction distances with custom x-axis labels
fig_scatter = px.scatter(interaction_df, x='residue_label', y='distance (angstroms)', color='interacting_chain', text='interacting_atom',
                         title='Interaction Distances by Residue and Interacting Chain',
                         labels={'residue_label': 'Residue (Number and Identity)', 'distance (angstroms)': 'Distance (angstroms)', 'interacting_chain': 'Interacting Chain'},
                         hover_data=['file'])

fig_scatter.update_traces(textposition='top center', marker=dict(size=10, opacity=0.6))
fig_scatter.update_layout(xaxis={'categoryorder':'array', 'categoryarray': interaction_df['residue_label'].cat.categories})

# Show the plot
fig_scatter.show()



ALL together


In [16]:
import pandas as pd
import plotly.express as px

# Define the path to the uploaded CSV file
csv_file_path = '/Users/jazzeruncal/git/Jazzer_surf/3d_predictions/chimeras/motor2x_ATP2x_Mg2x_abTub/interactions_all_files.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Filter to get the shortest distances for each interaction
df = df.loc[df[df['chain'] == 'A'].groupby(['file', 'resi', 'interacting_chain'])['distance (angstroms)'].idxmin()]

# Exclude interactions with Mg and chain letters A, G, H
df = df[(df['interacting_resn'] != 'MG') & (~df['interacting_chain'].isin(['A', 'G', 'H']))].reset_index(drop=True)

# Map interacting_resn to single-letter codes for better readability
aa_dict = {
    'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G',
    'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',
    'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'
}
df['residue_one_letter'] = df['resn'].map(aa_dict)
df['residue'] = df['resi'].astype(str) + df['residue_one_letter']

# Replace chain letters with descriptive names
chain_map = {
    'B': 'kinesin B',
    'C': 'alpha-tubulin',
    'D': 'beta-tubulin',
    'E': 'ATP',
    'F': 'ATP'
}
df['interacting_chain'] = df['interacting_chain'].map(chain_map)

# Group by file and interacting chain, keeping residues in order
grouped = df.groupby(['file', 'interacting_chain'])

# Create a list to store the output rows
output_rows = []

# Iterate over each group and extract information
for (file, chain), group in grouped:
    for _, row in group.iterrows():
        output_rows.append({
            'file': file,
            'interacting_chain': chain,
            'interacting_atom': row['interacting_atom'],
            'residue_number': row['resi'],
            'amino_acid': row['residue_one_letter'],
            'distance (angstroms)': row['distance (angstroms)'],
        })

# Convert the list to a DataFrame
interaction_df = pd.DataFrame(output_rows)

# Save the resulting DataFrame to a CSV file
interaction_df.to_csv("interaction_summary_all_files.csv", index=False)

# Load the summarized interaction data
csv_file_path = "interaction_summary_all_files.csv"
interaction_df = pd.read_csv(csv_file_path)

# Create a new column for the x-axis labels by concatenating residue number and amino acid
interaction_df['residue_label'] = interaction_df['residue_number'].astype(str) + interaction_df['amino_acid']

# Sort the dataframe by file and residue number to maintain order
interaction_df = interaction_df.sort_values(by=['file', 'residue_number'])

# Convert residue_label to a categorical type with the correct order
interaction_df['residue_label'] = pd.Categorical(interaction_df['residue_label'], 
                                                 categories=sorted(interaction_df['residue_label'].unique(), key=lambda x: int(''.join(filter(str.isdigit, x)))), 
                                                 ordered=True)

# Scatter plot for interaction distances with custom x-axis labels
fig_scatter = px.scatter(interaction_df, x='residue_label', y='distance (angstroms)', color='interacting_chain', text='interacting_atom',
                         title='Interaction Distances by Residue and Interacting Chain',
                         labels={'residue_label': 'Residue (Number and Identity)', 'distance (angstroms)': 'Distance (angstroms)', 'interacting_chain': 'Interacting Chain'},
                         hover_data=['file'])

fig_scatter.update_traces(textposition='top center', marker=dict(size=10, opacity=0.6))
fig_scatter.update_layout(xaxis={'categoryorder':'array', 'categoryarray': interaction_df['residue_label'].cat.categories})

# Show the plot
fig_scatter.show()


Each one separate

In [19]:
import pandas as pd
import plotly.express as px

# Define the path to the CSV file
csv_file_path = '/Users/jazzeruncal/git/Jazzer_surf/3d_predictions/same_seed/interactions_all_files.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Print column names to check for correct naming
print(df.columns)

# Adjust the column names if necessary
# Assuming the columns are named 'resi' instead of 'residue_number'
residue_column = 'resi'
amino_acid_column = 'residue_one_letter'

# Filter to get the shortest distances for each interaction
df = df.loc[df[df['chain'] == 'A'].groupby(['file', residue_column, 'interacting_chain'])['distance (angstroms)'].idxmin()]

# Exclude interactions with Mg and chain letters A, G, H
df = df[(df['interacting_resn'] != 'MG') & (~df['interacting_chain'].isin(['A', 'G', 'H']))].reset_index(drop=True)

# Map interacting_resn to single-letter codes for better readability
aa_dict = {
    'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G',
    'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',
    'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'
}
df['residue_one_letter'] = df['resn'].map(aa_dict)
df['residue'] = df[residue_column].astype(str) + df['residue_one_letter']

# Replace chain letters with descriptive names
chain_map = {
    'B': 'kinesin B',
    'C': 'alpha-tubulin',
    'D': 'beta-tubulin',
    'E': 'ATP',
    'F': 'ATP'
}
df['interacting_chain'] = df['interacting_chain'].map(chain_map)

# Create a plot for each file
unique_files = df['file'].unique()

for file in unique_files:
    # Filter the DataFrame for the current file
    file_df = df[df['file'] == file]

    # Create a new column for the x-axis labels by concatenating residue number and amino acid
    file_df['residue_label'] = file_df[residue_column].astype(str) + file_df[amino_acid_column]

    # Sort the DataFrame by residue number to maintain order
    file_df = file_df.sort_values(by=[residue_column])

    # Convert residue_label to a categorical type with the correct order
    file_df['residue_label'] = pd.Categorical(file_df['residue_label'], 
                                              categories=sorted(file_df['residue_label'].unique(), key=lambda x: int(''.join(filter(str.isdigit, x)))), 
                                              ordered=True)

    # Scatter plot for interaction distances with custom x-axis labels
    fig_scatter = px.scatter(file_df, x='residue_label', y='distance (angstroms)', color='interacting_chain', text='interacting_resn',
                             title=f'Interaction Distances for {file} by Residue and Interacting Chain',
                             labels={'residue_label': 'Residue (Number and Identity)', 'distance (angstroms)': 'Distance (angstroms)', 'interacting_chain': 'Interacting Chain'},
                             hover_data=['file'])

    fig_scatter.update_traces(textposition='top center', marker=dict(size=10, opacity=0.6))
    fig_scatter.update_layout(xaxis={'categoryorder':'array', 'categoryarray': file_df['residue_label'].cat.categories})

    # Show the plot
    fig_scatter.show()



Index(['file', 'chain', 'resi', 'resn', 'atom_name', 'interacting_atom',
       'interacting_resn', 'interacting_chain', 'distance (angstroms)',
       'residue_one_letter', 'full_atom_name', 'interacting_full_atom_name'],
      dtype='object')




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

