# ChimeraX Color Script Generator

Upload per-residue entropy (or any scalar) values, sequences, and metadata.
This notebook generates a `.cxc` ChimeraX color script **per sequence** and
downloads them to your machine.

## 0. Setup – Clone repo & install dependencies

In [None]:
import os, subprocess

repo_dir = "sequence-cleaning"
if not os.path.isdir(repo_dir):
    subprocess.run(
        ["git", "clone", "https://github.com/espickle1/sequence-cleaning.git"],
        check=True,
    )

if os.path.basename(os.getcwd()) != repo_dir:
    os.chdir(repo_dir)

print(f"Working directory: {os.getcwd()}")

In [None]:
import numpy as np
import pandas as pd
from google.colab import files
from analysis.chimerax_color_lib import generate_chimerax_script, write_chimerax_script

## 1. Upload files

Upload three CSV files:
- **Entropy file** – must contain a `sequence_id` column and one or more columns of per-residue entropy values.
- **Sequences file** – must contain `sequence_id` and `sequence` columns.
- **Metadata file** – must contain a `sequence_id` column (additional columns are preserved in the output name).

In [None]:
print("Upload the ENTROPY file (.csv):")
entropy_upload = files.upload()
entropy_filename = list(entropy_upload.keys())[0]
df_entropy = pd.read_csv(entropy_filename)
print(f"Loaded {entropy_filename}: {df_entropy.shape}")
df_entropy.head()

In [None]:
print("Upload the SEQUENCES file (.csv):")
seq_upload = files.upload()
seq_filename = list(seq_upload.keys())[0]
df_sequences = pd.read_csv(seq_filename)
print(f"Loaded {seq_filename}: {df_sequences.shape}")
df_sequences.head()

In [None]:
print("Upload the METADATA file (.csv):")
meta_upload = files.upload()
meta_filename = list(meta_upload.keys())[0]
df_metadata = pd.read_csv(meta_filename)
print(f"Loaded {meta_filename}: {df_metadata.shape}")
df_metadata.head()

## 2. Merge files on `sequence_id`

In [None]:
df_merged = (
    df_entropy
    .merge(df_sequences[["sequence_id", "sequence"]], on="sequence_id", how="inner")
    .merge(df_metadata[["sequence_id", "name"]], on="sequence_id", how="left")
)
print(f"Merged rows: {len(df_merged)}")
df_merged.head()

## 3. Configure color mapping

Adjust these parameters as needed before generating the scripts.

In [None]:
# --- Configuration ---
# Which column(s) hold the per-residue entropy values?
# If your entropy file has a single value column, set this to that column name.
# All columns that are NOT 'sequence_id', 'sequence', or 'name' are treated as
# per-residue value columns by default.
value_columns = [
    c for c in df_entropy.columns if c != "sequence_id"
]
print(f"Value columns detected: {value_columns}")

# Colormap and scaling
CMAP_NAME = "Greys"            # seaborn / matplotlib colormap
TRANSFORM_METHOD = "quantile"   # 'quantile', 'power', 'standard', 'robust', or 'none'
COLOR = True
COLOR_INVERT = False
TRANSPARENCY = False
TRANSPARENCY_INVERT = False

## 4. Generate `.cxc` scripts (one per sequence)

In [None]:
import os

output_dir = "cxc_output"
os.makedirs(output_dir, exist_ok=True)

generated_files = []

for _, row in df_merged.iterrows():
    seq_id = row["sequence_id"]
    label = row.get("name", seq_id) or seq_id

    # Collect per-residue values from all value columns into a single array
    values = row[value_columns].values.astype(float)

    script = generate_chimerax_script(
        values,
        cmap_name=CMAP_NAME,
        transform_method=TRANSFORM_METHOD,
        color=COLOR,
        color_invert=COLOR_INVERT,
        transparency=TRANSPARENCY,
        transparency_invert=TRANSPARENCY_INVERT,
    )

    safe_label = str(label).replace(" ", "_").replace("/", "_")
    out_path = os.path.join(output_dir, f"{safe_label}_{seq_id}.cxc")
    write_chimerax_script(script, out_path)
    generated_files.append(out_path)
    print(f"  Created: {out_path}")

print(f"\nGenerated {len(generated_files)} .cxc files.")

## 5. Download `.cxc` files

In [None]:
for f in generated_files:
    files.download(f)