In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
from scripts.topology import (
    ensure_dir,
    get_transcripts_and_sequences,
    align_protein_sequences,
    run_deeptmhmm,
    create_membrane_topology_objects
)
from scripts.expression import (
    getting_expression_data,
    create_expression_figure_objects
)
import json
import shelve

## Variables

# Output directory
out_dir = "./files"
out_dir_for_plots = "./app/files_for_plots"

# Proteins we want to show
proteins = ["HER2", "CD20", "PROM1"]

# Email for alignment
email = "s242830@dtu.dk"

# Variables for each protein
proteins_ids = {
    "HER2": "ENSG00000141736",
    "CD20": "ENSG00000156738",
    "PROM1": "ENSG00000007062"
}

# Running the analysis for each protein
for protein in proteins:

    print(f"Processing {protein}...")
    ensure_dir(out_dir + "/" + protein)
    transcripts_id, mapping = get_transcripts_and_sequences(proteins_ids[protein], out_dir + "/" + protein)
    align_protein_sequences(email, out_dir + "/" + protein)
    run_deeptmhmm(out_dir + "/" + protein)    

    with shelve.open(out_dir_for_plots + "/transcripts_to_isoforms_mapping") as db:
        db[protein] = mapping

    membrane_topology_object = create_membrane_topology_objects(mapping, out_dir + "/" + protein)

    with shelve.open(out_dir_for_plots + "/membrane_topology_objects") as db:
        db[protein] = membrane_topology_object
    
    expression_df = getting_expression_data(transcripts_id, out_dir + "/" + protein)
    TCGA_GTEx_plotting_data = create_expression_figure_objects(out_dir + "/" + protein, expression_df, mapping)
    
    with shelve.open(out_dir_for_plots + "/TCGA_GTEx_plotting_data") as db:
        db[protein] = TCGA_GTEx_plotting_data

Processing CD20...
Fetching transcripts for gene with Ensembl ID: ENSG00000156738...
Fetching protein sequence for Ensembl IDs: ENST00000532491 ENST00000532073 ENST00000345732 ENST00000534668 ENST00000528313 ENST00000533306 ENST00000674194 ENST00000389939 ENST00000904593 ENST00000904594 ENST00000966396
Submitting alignment job...
Successfully submitted. Job ID: clustalo-R20260121-141431-0177-2735020-p1m
Job status: RUNNING
Job status: FINISHED
Fetching alignment result...
2026-01-21 15:14:45,176 | INFO : Loaded project DTU/DeepTMHMM:1.0.50
Running DeepTMHMM...
2026-01-21 15:14:55,879 | INFO : View the result in your browser at: https://biolib.com/results/e9a1225b-0b8e-483a-9cb2-e524a0957669/
2026-01-21 15:14:57,416 | INFO : Cloud: The job has been queued. Please wait...
2026-01-21 15:15:00,084 | INFO : Cloud: The job has been queued. Please wait...
2026-01-21 15:15:03,416 | INFO : Cloud: The job has been queued. Please wait...
2026-01-21 15:15:07,750 | INFO : Cloud: The job has been qu