In [1]:
import os
import matplotlib.pyplot as plt
import pandas as pd
from scripts.cellular_localization import run_deeploc2
from scripts.topology import (
    ensure_dir,
    get_transcripts_and_sequences,
    align_protein_sequences,
    run_deeptmhmm,
    generate_isoform_mapping,
    create_membrane_topology_objects
)
from scripts.expression import (
    getting_expression_data,
    create_expression_figure_objects
)
import json
import shelve

## Variables

# Output directory
out_dir = "./files"
out_dir_for_plots = "./app_v2/files_for_plots"

# Proteins we want to show
proteins = ["HER2", "CD20"]

# Email for alignment
email = "s242830@dtu.dk"

# Variables for each protein
proteins_ids = {
    "HER2": "ENSG00000141736",
    "CD20": "ENSG00000156738"
}

# Running the analysis for each protein
for protein in proteins:

    print(f"Processing {protein}...")
    ensure_dir(out_dir + "/" + protein)
    transcripts_id = get_transcripts_and_sequences(proteins_ids[protein], out_dir + "/" + protein)
    align_protein_sequences(email, out_dir + "/" + protein)
    run_deeptmhmm(out_dir + "/" + protein)
    mapping = generate_isoform_mapping(out_dir + "/" + protein)

    with shelve.open(out_dir_for_plots + "/transcripts_to_isoforms_mapping") as db:
        db[protein] = mapping

    membrane_topology_object = create_membrane_topology_objects(mapping, out_dir + "/" + protein)

    with shelve.open(out_dir_for_plots + "/membrane_topology_objects") as db:
        db[protein] = membrane_topology_object
    
    expression_df = getting_expression_data(transcripts_id, out_dir + "/" + protein)
    TCGA_GTEx_plotting_data = create_expression_figure_objects(out_dir + "/" + protein, expression_df, mapping)
    
    with shelve.open(out_dir_for_plots + "/TCGA_GTEx_plotting_data") as db:
        db[protein] = TCGA_GTEx_plotting_data

  re.sub(r"^[^[]+[[]([^]]*)[]].*$", r"\1", query, flags=re.DOTALL))


Processing HER2...
Fetching transcripts for gene with Ensembl ID: ENSG00000141736...
Fetching protein sequence for Ensembl IDs: ENST00000584601 ENST00000584014 ENST00000578199 ENST00000445658 ENST00000584450 ENST00000269571 ENST00000578502 ENST00000578709 ENST00000582818 ENST00000580074 ENST00000863095 ENST00000863096 ENST00000863097 ENST00000863098 ENST00000863099 ENST00000863100 ENST00000863101 ENST00000863102 ENST00000863103 ENST00000938923 ENST00000938924 ENST00000938925 ENST00000959774 ENST00000959775
Submitting alignment job...
Successfully submitted. Job ID: clustalo-R20251219-123419-0287-79626582-p1m
Job status: QUEUED
Job status: FINISHED
Fetching alignment result...
2025-12-19 13:34:32,436 | INFO : Loaded project DTU/DeepTMHMM:1.0.44
Running DeepTMHMM...
2025-12-19 13:34:33,103 | INFO : View the result in your browser at: https://biolib.com/results/9dcd8e47-9a3c-4f8c-a86d-4c0c033d0267/
2025-12-19 13:34:34,536 | INFO : Cloud: The job has been queued. Please wait...
2025-12-19 