In [1]:
# Setup

import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import glob

csv_dir = os.path.join("..", "..", "data/out_csvs")
hbond_heatmap_csv_dir = os.path.join(csv_dir, "heatmap_data")

def __safe_mkdir(directory: str) -> None:
    """Safely creates a directory if it does not already exist."""
    os.makedirs(directory, exist_ok=True)


In [2]:
# Now, for all the H-bond CSVs present,
# make heatmaps showing the relationship
# between the distance and angle

heatmap_csv_files = glob.glob(f"{hbond_heatmap_csv_dir}/*.csv")

for heatmap_csv_file in heatmap_csv_files:
    with open(heatmap_csv_file, 'r') as file:
        # Check the number of lines in the file
        if sum(1 for line in file) < 101:
            continue  # Skip files with fewer than 101 lines
            # i.e. 100 data points
    # Read into DataFrame
    csv_df = pd.read_csv(heatmap_csv_file)
    # Get the name of the CSV file
    heatmap_csv_name = heatmap_csv_file.split("/")[-1]
    heatmap_csv_res_atoms = heatmap_csv_name.split(".")[0]
    # Get the residues and atoms
    heatmap_csv_res = heatmap_csv_res_atoms.split(" ")[0]
    heatmap_csv_atoms = heatmap_csv_res_atoms.split(" ")[1]
    # Get the individual residues
    heatmap_csv_res_1 = heatmap_csv_res.split("-")[0]
    heatmap_csv_res_2 = heatmap_csv_res.split("-")[1]
    # Get the individual atoms
    heatmap_csv_atom_1 = heatmap_csv_atoms.split("-")[0]
    heatmap_csv_atom_2 = heatmap_csv_atoms.split("-")[1]

    # Select distance and angle
    hbonds_subset = csv_df[["distance", "angle"]].reset_index(drop=True)

    # Binning
    distance_bins = np.arange(2.0, 4.1, 0.1)  # Bins from 2 to 4 in increments of 0.1
    angle_bins = np.arange(0, 181, 10)  # Bins from 0 to 180 in increments of 10
    hbonds_subset["distance_bin"] = pd.cut(hbonds_subset["distance"], bins=distance_bins)
    hbonds_subset["angle_bin"] = pd.cut(hbonds_subset["angle"], bins=angle_bins)
    # Data
    heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
    # Plot
    plt.figure(figsize=(6, 6))
    sns.heatmap(heatmap_data, cmap="gray_r", xticklabels=1, yticklabels=range(0, 181, 10), square=True)
    plt.xticks(np.arange(len(distance_bins)) + 0.5,[f"{bin_val:.1f}" for bin_val in distance_bins],rotation=0)
    plt.yticks(np.arange(len(angle_bins)) + 0.5, angle_bins, rotation=0)
    plt.xlabel("Distance (Å)")
    plt.ylabel("Angle (°)")
    # Set name and dir
    map_name = f"{heatmap_csv_res_1}-{heatmap_csv_res_2} {heatmap_csv_atom_1}-{heatmap_csv_atom_2}"
    map_dir = (
        "figure_4_heatmaps/RNA-RNA" if len(heatmap_csv_res_1) == 1 and len(heatmap_csv_res_2) == 1
        else "figure_4_heatmaps/RNA-PROT"
    )
    __safe_mkdir(map_dir)
    map_path = f"{map_dir}/{map_name}.png"
    # 2D histogram
    plt.figure(figsize=(6, 4.8))
    plt.hist2d(
        hbonds_subset["distance"],
        hbonds_subset["angle"],
        bins=[distance_bins, angle_bins],
        cmap="gray_r",
    )
    plt.xlabel("Distance (Å)")
    plt.ylabel("Angle (°)")
    plt.colorbar(label="Count")
    plt.title(f"{map_name} H-bond heatmap")

    plt.savefig(map_path, dpi=250)
    plt.close()

plt.close('all')





  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_value=0))
  heatmap_data = (hbonds_subset.groupby(["angle_bin", "distance_bin"]).size().unstack(fill_