In [9]:
import pandas as pd
import os

# Amino acid mapping from three-letter to one-letter codes
aa_3to1 = {
    'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C',
    'GLN': 'Q', 'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I',
    'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P',
    'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'
}

# Mapping atom names to their complete names
atom_name_mapping = {
    'CA': 'Carbon Alpha', 'CB': 'Carbon Beta', 'CG': 'Carbon Gamma',
    'CD': 'Carbon Delta', 'CE': 'Carbon Epsilon', 'NZ': 'Nitrogen Zeta',
    'O': 'Oxygen', 'N': 'Nitrogen', 'C': 'Carbon', 'H': 'Hydrogen'
    # Add more mappings as needed
}

# Function to convert three-letter amino acid codes to one-letter codes
def convert_aa_code(three_letter_code):
    return aa_3to1.get(three_letter_code, three_letter_code)

# Function to map atom names to their complete names
def map_atom_name(atom_name):
    return atom_name_mapping.get(atom_name, atom_name)

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Apply the conversions
        df['Residue Name 1'] = df['Residue Name 1'].apply(convert_aa_code)
        df['Residue Name 2'] = df['Residue Name 2'].apply(convert_aa_code)
        df['Atom Name 1'] = df['Atom Name 1'].apply(map_atom_name)
        df['Atom Name 2'] = df['Atom Name 2'].apply(map_atom_name)
        
        # Overwrite the original CSV file
        df.to_csv(file_path, index=False)

        print(f"Converted and overwrote {filename}")

print("All files have been processed and converted.")


Converted and overwrote fold_adpamt_model_0_kinesin_beta.csv
Converted and overwrote fold_kif3mt_model_0_kinesin_alpha.csv
Converted and overwrote fold_healmt_model_0_kinesin_beta.csv
Converted and overwrote fold_adpamt_model_0_kinesin_alpha.csv
Converted and overwrote fold_healmt_model_0_kinesin_alpha.csv
Converted and overwrote fold_k401mt_model_0_kinesin_beta.csv
Converted and overwrote fold_k401mt_model_0_kinesin_alpha.csv
Converted and overwrote fold_thtrmt_model_0_kinesin_beta.csv
Converted and overwrote fold_kif5mt_model_0_kinesin_beta.csv
Converted and overwrote fold_thtrmt_model_0_kinesin_alpha.csv
Converted and overwrote fold_kif3mt_model_0_kinesin_beta.csv
Converted and overwrote fold_kif5mt_model_0_kinesin_alpha.csv
All files have been processed and converted.


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [10]:
import pandas as pd
import altair as alt
import os

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize an empty DataFrame to store all data
combined_df = pd.DataFrame()

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Add a column to distinguish between the alpha and beta interactions
        if 'alpha' in filename:
            df['Type'] = 'Alpha'
        elif 'beta' in filename:
            df['Type'] = 'Beta'
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '').replace('_beta.csv', '')
        df['Kinesin'] = kinesin_name
        
        # Append to the combined DataFrame
        combined_df = pd.concat([combined_df, df])

# Create the plot
plot = alt.Chart(combined_df).mark_point().encode(
    y=alt.Y('Type:N', title=None),
    x=alt.X('Distance (Å):Q', title='Distance (Å)'),
    color=alt.Color('Residue Name 1:N', legend=alt.Legend(title="Residue Name (Kinesin)")),
    column=alt.Column('Kinesin:N', title="Kinesin"),
    tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
).properties(
    height=250,
    width=450
).configure_text(
    fontWeight='bold'
)

# Display the plot
plot


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [13]:
import pandas as pd
import altair as alt
import os

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize an empty DataFrame to store all data
combined_df = pd.DataFrame()

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Add a column to distinguish between the alpha and beta interactions
        if 'alpha' in filename:
            df['Type'] = 'Alpha'
        elif 'beta' in filename:
            df['Type'] = 'Beta'
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '').replace('_beta.csv', '')
        df['Kinesin'] = kinesin_name
        
        # Append to the combined DataFrame
        combined_df = pd.concat([combined_df, df], ignore_index=True)

# Create the plot
plot = alt.Chart(combined_df).mark_point().encode(
    y=alt.Y('Type:N', title=None),
    x=alt.X('Distance (Å):Q', title='Distance (Å)'),
    color=alt.Color('Kinesin:N', legend=alt.Legend(title="Kinesin")),
    shape=alt.Shape('Residue Name 1:N', legend=alt.Legend(title="Residue Name (Kinesin)")),
    tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q', 'Kinesin:N']
).properties(
    height=400,
    width=600
).configure_text(
    fontWeight='bold'
)

# Display the plot
plot


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [16]:
import pandas as pd
import altair as alt
import os

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize empty DataFrames to store alpha and beta interactions
alpha_df = pd.DataFrame()
beta_df = pd.DataFrame()

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '').replace('_beta.csv', '')
        df['Kinesin'] = kinesin_name
        
        # Add to the respective DataFrame
        if 'alpha' in filename:
            df['Type'] = 'Alpha'
            alpha_df = pd.concat([alpha_df, df], ignore_index=True)
        elif 'beta' in filename:
            df['Type'] = 'Beta'
            beta_df = pd.concat([beta_df, df], ignore_index=True)

# Create the alpha interactions plot
alpha_plot = alt.Chart(alpha_df).mark_point().encode(
    y=alt.Y('Kinesin:N', title=None),
    x=alt.X('Distance (Å):Q', title='Distance (Å)'),
    tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
).properties(
    height=400,
    width=600,
    title='Alpha Interactions'
)

# Create the beta interactions plot
beta_plot = alt.Chart(beta_df).mark_point().encode(
    y=alt.Y('Kinesin:N', title=None),
    x=alt.X('Distance (Å):Q', title='Distance (Å)'),
    tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
).properties(
    height=400,
    width=600,
    title='Beta Interactions'
)

# Combine the plots into a single chart
combined_plot = alt.vconcat(alpha_plot, beta_plot).configure_text(
    fontWeight='bold'
)

# Display the combined plot
combined_plot


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df

In [20]:
import pandas as pd
import altair as alt
import os

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize empty DataFrames to store alpha interactions
alpha_df = pd.DataFrame()

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv") and 'alpha' in filename:
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '')
        df['Kinesin'] = kinesin_name
        df['Type'] = 'Alpha'
        
        # Append to the alpha DataFrame
        alpha_df = pd.concat([alpha_df, df], ignore_index=True)

# Filter the alpha interactions to include only interactions with E416 of the alpha tubulin
filtered_alpha_df = alpha_df[(alpha_df['Residue Name 2'] == 'E') & (alpha_df['Residue ID 2'] == 416)]

# Ensure all Kinesins are represented, even if they have no interactions with E416
all_kinesins = alpha_df['Kinesin'].unique()
for kinesin in all_kinesins:
    if kinesin not in filtered_alpha_df['Kinesin'].values:
        empty_row = pd.DataFrame({'Residue Name 1': [None], 'Residue ID 1': [None], 'Atom Name 1': [None], 
                                  'Residue Name 2': ['E'], 'Residue ID 2': [416], 'Atom Name 2': [None], 
                                  'Distance (Å)': [None], 'Kinesin': [kinesin], 'Type': ['Alpha']})
        filtered_alpha_df = pd.concat([filtered_alpha_df, empty_row], ignore_index=True)

# Create the plot
e416_plot = alt.Chart(filtered_alpha_df).mark_point().encode(
    y=alt.Y('Kinesin:N', title=None),
    x=alt.X('Distance (Å):Q', title='Distance (Å)', scale=alt.Scale(domain=[0, 4])),
    tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Distance (Å):Q']
).properties(
    height=400,
    width=600,
    title='Interactions with E416 of Alpha Tubulin'
).configure_text(
    fontWeight='bold'
)

# Display the plot
e416_plot


  filtered_alpha_df = pd.concat([filtered_alpha_df, empty_row], ignore_index=True)
  filtered_alpha_df = pd.concat([filtered_alpha_df, empty_row], ignore_index=True)
  filtered_alpha_df = pd.concat([filtered_alpha_df, empty_row], ignore_index=True)
  filtered_alpha_df = pd.concat([filtered_alpha_df, empty_row], ignore_index=True)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [21]:
import pandas as pd
import altair as alt
import os

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize empty DataFrames to store alpha interactions
alpha_df = pd.DataFrame()

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv") and 'alpha' in filename:
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '')
        df['Kinesin'] = kinesin_name
        df['Type'] = 'Alpha'
        
        # Append to the alpha DataFrame
        alpha_df = pd.concat([alpha_df, df], ignore_index=True)

# Filter the alpha interactions to include only interactions with residues 410 to 417 of the alpha tubulin
filtered_alpha_df = alpha_df[(alpha_df['Residue Name 2'] == 'E') & (alpha_df['Residue ID 2'].between(410, 417))]

# Ensure all Kinesins are represented, even if they have no interactions with residues 410 to 417
all_kinesins = alpha_df['Kinesin'].unique()
for kinesin in all_kinesins:
    if kinesin not in filtered_alpha_df['Kinesin'].values:
        for residue_id in range(410, 418):
            empty_row = pd.DataFrame({'Residue Name 1': [None], 'Residue ID 1': [None], 'Atom Name 1': [None], 
                                      'Residue Name 2': ['E'], 'Residue ID 2': [residue_id], 'Atom Name 2': [None], 
                                      'Distance (Å)': [None], 'Kinesin': [kinesin], 'Type': ['Alpha']})
            filtered_alpha_df = pd.concat([filtered_alpha_df, empty_row], ignore_index=True)

# Create the plot
alpha_plot = alt.Chart(filtered_alpha_df).mark_point().encode(
    y=alt.Y('Kinesin:N', title=None),
    x=alt.X('Distance (Å):Q', title='Distance (Å)', scale=alt.Scale(domain=[0, 4])),
    color=alt.Color('Residue ID 2:Q', legend=alt.Legend(title="Residue ID")),
    tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
).properties(
    height=400,
    width=600,
    title='Interactions with Residues 410-417 of Alpha Tubulin'
).configure_text(
    fontWeight='bold'
)

# Display the plot
alpha_plot


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [25]:
import pandas as pd
import altair as alt
import os

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize empty DataFrames to store alpha interactions
alpha_df = pd.DataFrame()

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv") and 'alpha' in filename:
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '')
        df['Kinesin'] = kinesin_name
        df['Type'] = 'Alpha'
        
        # Append to the alpha DataFrame
        alpha_df = pd.concat([alpha_df, df], ignore_index=True)

# Filter the alpha interactions to include only interactions with residues 410 to 417 of the alpha tubulin
filtered_alpha_df = alpha_df[(alpha_df['Residue ID 2'].between(410, 417))]

# Ensure all Kinesins are represented, even if they have no interactions with residues 410 to 417
all_kinesins = alpha_df['Kinesin'].unique()
for kinesin in all_kinesins:
    if kinesin not in filtered_alpha_df['Kinesin'].values:
        for residue_id in range(410, 418):
            empty_row = pd.DataFrame({'Residue Name 1': [None], 'Residue ID 1': [None], 'Atom Name 1': [None], 
                                      'Residue Name 2': ['E'], 'Residue ID 2': [residue_id], 'Atom Name 2': [None], 
                                      'Distance (Å)': [None], 'Kinesin': [kinesin], 'Type': ['Alpha']})
            filtered_alpha_df = pd.concat([filtered_alpha_df, empty_row], ignore_index=True)

# Define the specific order for Kinesin
kinesin_order = ['fold_k401mt_model_0_kinesin', 'fold_kif5mt_model_0_kinesin', 'fold_thtrmt_model_0_kinesin'] + \
                [k for k in all_kinesins if k not in ['fold_k401mt_model_0_kinesin', 'fold_kif5mt_model_0_kinesin', 'fold_thtrmt_model_0_kinesin']]

# Convert Kinesin column to ordered categorical type
filtered_alpha_df['Kinesin'] = pd.Categorical(filtered_alpha_df['Kinesin'], categories=kinesin_order, ordered=True)

# Define color scale for distance intervals from 2.4 Å to 4.0 Å
distance_color_scale = alt.Scale(domain=[2.4, 2.6, 2.8, 3.0, 3.2, 3.4, 3.6, 3.8, 4.0],
                                 range=['red', 'orange', 'yellow', 'green', 'blue', 'purple', 'pink', 'brown', 'grey'])

# Create the plot
alpha_plot = alt.Chart(filtered_alpha_df).mark_point().encode(
    y=alt.Y('Kinesin:N', title=None, sort=kinesin_order),
    x=alt.X('Distance (Å):Q', title='Distance (Å)', scale=alt.Scale(domain=[2.4, 4.0])),
    color=alt.Color('Distance (Å):Q', scale=distance_color_scale, legend=alt.Legend(title="Distance (Å)")),
    tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
).properties(
    height=400,
    width=600,
    title='Interactions with Residues 410-417 of Alpha Tubulin'
).configure_text(
    fontWeight='bold'
)

# Display the plot
alpha_plot


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_alpha_df['Kinesin'] = pd.Categorical(filtered_alpha_df['Kinesin'], categories=kinesin_order, ordered=True)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [27]:
import pandas as pd
import altair as alt
import os

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize empty DataFrames to store alpha and beta interactions
alpha_df = pd.DataFrame()
beta_df = pd.DataFrame()

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '').replace('_beta.csv', '')
        df['Kinesin'] = kinesin_name
        
        # Add to the respective DataFrame
        if 'alpha' in filename:
            df['Type'] = 'Alpha'
            alpha_df = pd.concat([alpha_df, df], ignore_index=True)
        elif 'beta' in filename:
            df['Type'] = 'Beta'
            beta_df = pd.concat([beta_df, df], ignore_index=True)

# Define the specific order for Kinesin
kinesin_order = ['fold_k401mt_model_0_kinesin', 'fold_kif5mt_model_0_kinesin', 'fold_thtrmt_model_0_kinesin'] + \
                [k for k in alpha_df['Kinesin'].unique() if k not in ['fold_k401mt_model_0_kinesin', 'fold_kif5mt_model_0_kinesin', 'fold_thtrmt_model_0_kinesin']]

# Convert Kinesin column to ordered categorical type
alpha_df['Kinesin'] = pd.Categorical(alpha_df['Kinesin'], categories=kinesin_order, ordered=True)
beta_df['Kinesin'] = pd.Categorical(beta_df['Kinesin'], categories=kinesin_order, ordered=True)

# Create the alpha interactions plot
alpha_plot = alt.Chart(alpha_df).mark_point().encode(
    y=alt.Y('Kinesin:N', title=None, sort=kinesin_order),
    x=alt.X('Distance (Å):Q', title='Distance (Å)', scale=alt.Scale(domain=[2.4, 4.0])),
    tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
).properties(
    height=400,
    width=600,
    title='Alpha Interactions'
)

# Create the beta interactions plot
beta_plot = alt.Chart(beta_df).mark_point().encode(
    y=alt.Y('Kinesin:N', title=None, sort=kinesin_order),
    x=alt.X('Distance (Å):Q', title='Distance (Å)', scale=alt.Scale(domain=[2.4, 4.0])),
    tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
).properties(
    height=400,
    width=600,
    title='Beta Interactions'
)

# Combine the plots into a single chart
combined_plot = alt.vconcat(alpha_plot, beta_plot).configure_text(
    fontWeight='bold'
)

# Display the combined plot
combined_plot


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [31]:
import pandas as pd
import altair as alt
import os

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize an empty DataFrame to store all interactions
all_interactions_df = pd.DataFrame()

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '').replace('_beta.csv', '')
        df['Kinesin'] = kinesin_name
        
        # Add to the combined DataFrame
        all_interactions_df = pd.concat([all_interactions_df, df], ignore_index=True)

# Define the specific order for Kinesin
kinesin_order = ['fold_k401mt_model_0_kinesin', 'fold_kif5mt_model_0_kinesin', 'fold_thtrmt_model_0_kinesin'] + \
                [k for k in all_interactions_df['Kinesin'].unique() if k not in ['fold_k401mt_model_0_kinesin', 'fold_kif5mt_model_0_kinesin', 'fold_thtrmt_model_0_kinesin']]

# Convert Kinesin column to ordered categorical type
all_interactions_df['Kinesin'] = pd.Categorical(all_interactions_df['Kinesin'], categories=kinesin_order, ordered=True)

# Identify repeated Kinesin residues
repeated_residues = all_interactions_df['Residue ID 1'].value_counts()
repeated_residues = repeated_residues[repeated_residues > 1].index.tolist()

# Filter interactions involving these repeated residues
filtered_interactions_df = all_interactions_df[all_interactions_df['Residue ID 1'].isin(repeated_residues)]

# Create the plot
plot = alt.Chart(filtered_interactions_df).mark_point().encode(
    y=alt.Y('Residue ID 1:N', title='Kinesin Residue ID'),
    x=alt.X('Distance (Å):Q', title='Distance (Å)', scale=alt.Scale(domain=[2.4, 4.0])),
    color=alt.Color('Kinesin:N', legend=alt.Legend(title="Kinesin")),
    tooltip=['Kinesin:N', 'Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
).properties(
    height=400,
    width=600,
    title='Repeated Kinesin Residue Interactions'
).configure_text(
    fontWeight='bold'
)

# Display the plot
plot


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [32]:
import pandas as pd
import altair as alt
import os

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize an empty DataFrame to store all interactions
all_interactions_df = pd.DataFrame()

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '').replace('_beta.csv', '')
        df['Kinesin'] = kinesin_name
        
        # Add to the combined DataFrame
        all_interactions_df = pd.concat([all_interactions_df, df], ignore_index=True)

# Define the specific order for Kinesin
kinesin_order = ['fold_k401mt_model_0_kinesin', 'fold_kif5mt_model_0_kinesin', 'fold_thtrmt_model_0_kinesin'] + \
                [k for k in all_interactions_df['Kinesin'].unique() if k not in ['fold_k401mt_model_0_kinesin', 'fold_kif5mt_model_0_kinesin', 'fold_thtrmt_model_0_kinesin']]

# Convert Kinesin column to ordered categorical type
all_interactions_df['Kinesin'] = pd.Categorical(all_interactions_df['Kinesin'], categories=kinesin_order, ordered=True)

# Identify Kinesin residues that interact with tubulins
interacting_residues = all_interactions_df['Residue ID 1'].unique()

# Filter interactions involving these residues
filtered_interactions_df = all_interactions_df[all_interactions_df['Residue ID 1'].isin(interacting_residues)]

# Create the plot
plot = alt.Chart(filtered_interactions_df).mark_point().encode(
    y=alt.Y('Residue ID 1:N', title='Kinesin Residue ID'),
    x=alt.X('Distance (Å):Q', title='Distance (Å)', scale=alt.Scale(domain=[2.4, 4.0])),
    color=alt.Color('Kinesin:N', legend=alt.Legend(title="Kinesin")),
    tooltip=['Kinesin:N', 'Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
).properties(
    height=600,
    width=800,
    title='Kinesin Residue Interactions with Tubulins'
).configure_text(
    fontWeight='bold'
)

# Display the plot
plot


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [36]:
import pandas as pd
import altair as alt
import os

# Mapping of three-letter amino acid codes to one-letter codes
aa_3to1 = {
    'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C',
    'GLN': 'Q', 'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I',
    'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P',
    'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'
}

# Function to convert three-letter amino acid codes to one-letter codes
def convert_aa_code(three_letter_code):
    return aa_3to1.get(three_letter_code, three_letter_code)

# Path to the directory containing the CSV files
directory_path = '/Users/jazzeruncal/git/Jazzer_surf/data/interactionsMT'

# Initialize an empty list to store individual heat maps
heatmap_list = []

# Process each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Extract the Kinesin name from the filename
        kinesin_name = filename.replace('test_kinesin_', '').replace('_alpha.csv', '').replace('_beta.csv', '')
        df['Kinesin'] = kinesin_name
        
        # Select the top 10 shortest interactions
        top_10_df = df.nsmallest(10, 'Distance (Å)')
        
        # Convert three-letter amino acid codes to one-letter codes
        top_10_df['Residue 1 One Letter'] = top_10_df['Residue Name 1'].apply(convert_aa_code)
        
        # Combine residue one-letter code with residue ID
        top_10_df['Residue 1 Label'] = top_10_df['Residue 1 One Letter'] + top_10_df['Residue ID 1'].astype(str)
        
        # Create a heat map for the current Kinesin
        heatmap = alt.Chart(top_10_df).mark_rect().encode(
            x=alt.X('Residue 1 Label:O', title='Kinesin Residue'),
            y=alt.Y('Residue ID 2:O', title='Tubulin Residue ID'),
            color=alt.Color('Distance (Å):Q', scale=alt.Scale(scheme='viridis'), title='Distance (Å)'),
            tooltip=['Residue Name 1:N', 'Residue ID 1:Q', 'Atom Name 1:N', 'Residue Name 2:N', 'Residue ID 2:Q', 'Atom Name 2:N', 'Distance (Å):Q']
        ).properties(
            height=300,
            width=400,
            title=f'Top 10 Shortest Interactions for {kinesin_name}'
        )
        
        # Add the heat map to the list
        heatmap_list.append(heatmap)

# Combine the heat maps into a single chart
combined_heatmaps = alt.vconcat(*heatmap_list).configure_text(
    fontWeight='bold'
)

# Display the combined heat maps
combined_heatmaps


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df