In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
from scripts.topology import (
    ensure_dir,
    get_transcripts_and_sequences,
    align_protein_sequences,
    run_deeptmhmm,
    generate_isoform_mapping,
    create_membrane_topology_objects
)
from scripts.expression import (
    getting_expression_data,
    create_expression_figure_objects
)
import json
import shelve

## Variables

# Output directory
out_dir = "./files"
out_dir_for_plots = "./app_v2/files_for_plots"

# Proteins we want to show
proteins = ["HER2", "CD20", "PROM1"]

# Email for alignment
email = "s242830@dtu.dk"

# Variables for each protein
proteins_ids = {
    "HER2": "ENSG00000141736",
    "CD20": "ENSG00000156738",
    "PROM1": "ENSG00000007062"
}

# Running the analysis for each protein
for protein in proteins[2:]:

    print(f"Processing {protein}...")
    ensure_dir(out_dir + "/" + protein)
    transcripts_id = get_transcripts_and_sequences(proteins_ids[protein], out_dir + "/" + protein)
    align_protein_sequences(email, out_dir + "/" + protein)
    run_deeptmhmm(out_dir + "/" + protein)
    mapping = generate_isoform_mapping(out_dir + "/" + protein)

    with shelve.open(out_dir_for_plots + "/transcripts_to_isoforms_mapping") as db:
        db[protein] = mapping

    membrane_topology_object = create_membrane_topology_objects(mapping, out_dir + "/" + protein)

    with shelve.open(out_dir_for_plots + "/membrane_topology_objects") as db:
        db[protein] = membrane_topology_object
    
    expression_df = getting_expression_data(transcripts_id, out_dir + "/" + protein)
    TCGA_GTEx_plotting_data = create_expression_figure_objects(out_dir + "/" + protein, expression_df, mapping)
    
    with shelve.open(out_dir_for_plots + "/TCGA_GTEx_plotting_data") as db:
        db[protein] = TCGA_GTEx_plotting_data

ModuleNotFoundError: No module named 'scripts.cellular_localization'