In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
from scripts.topology import (
    ensure_dir,
    get_transcripts_and_sequences,
    align_protein_sequences,
    run_deeptmhmm,
    create_membrane_topology_objects
)
from scripts.expression import (
    getting_expression_data,
    create_expression_figure_objects
)
import json
import shelve

## Variables

# Output directory
out_dir = "./files"
out_dir_for_plots = "./app/files_for_plots"

# Proteins we want to show
proteins = ["HER2", "CD20", "PROM1"]

# Email for alignment
email = "s242830@dtu.dk"

# Variables for each protein
proteins_ids = {
    "HER2": "ENSG00000141736",
    "CD20": "ENSG00000156738",
    "PROM1": "ENSG00000007062"
}

# Running the analysis for each protein
for protein in proteins:

    print(f"Processing {protein}...")
    ensure_dir(out_dir)
    transcripts_id, mapping = get_transcripts_and_sequences(proteins_ids[protein], out_dir)
    align_protein_sequences(email, out_dir)
    run_deeptmhmm(out_dir)    

    with shelve.open(out_dir_for_plots + "/transcripts_to_isoforms_mapping") as db:
        db[protein] = mapping

    membrane_topology_object = create_membrane_topology_objects(mapping, out_dir)

    with shelve.open(out_dir_for_plots + "/membrane_topology_objects") as db:
        db[protein] = membrane_topology_object
    
    expression_df = getting_expression_data(transcripts_id, out_dir)
    TCGA_GTEx_plotting_data = create_expression_figure_objects(out_dir, expression_df, mapping)
    
    with shelve.open(out_dir_for_plots + "/TCGA_GTEx_plotting_data") as db:
        db[protein] = TCGA_GTEx_plotting_data

Processing PROM1...
Fetching transcripts for gene with Ensembl ID: ENSG00000007062...
Fetching protein sequence for Ensembl IDs: ENST00000447510 ENST00000505450 ENST00000508167 ENST00000508322 ENST00000508940 ENST00000510224 ENST00000514967 ENST00000539194 ENST00000540805 ENST00000675377 ENST00000675613 ENST00000888869 ENST00000888870 ENST00000888871 ENST00000888872 ENST00000888873 ENST00000888874 ENST00000888875 ENST00000888876 ENST00000888877 ENST00000888878 ENST00000888879 ENST00000888880 ENST00000888881 ENST00000938908 ENST00000938909 ENST00000938910 ENST00000938911 ENST00000938912 ENST00000971409 ENST00000971410 ENST00000971411
ENST00000447510
ENST00000505450
ENST00000508322
ENST00000508940
ENST00000514967
ENST00000539194
ENST00000540805
ENST00000675377
ENST00000675613
ENST00000888869
ENST00000888870
ENST00000888871
ENST00000888872
ENST00000938908
ENST00000938909
ENST00000938910
ENST00000971409
Submitting alignment job...
Successfully submitted. Job ID: clustalo-R20260128-124726-0