In [40]:
import sys
from pathlib import Path
import pandas as pd

# Handle paths for Jupyter (where __file__ is not defined)
try:
    current_path = Path(__file__).resolve()
except NameError:
    # __file__ is not defined in Jupyter; use cwd as fallback
    current_path = Path.cwd()

# Set up project paths
project_root = current_path.parents[1] if len(current_path.parents) >= 3 else current_path
local_path = project_root / 'back_end'

# Add project paths to sys.path if not already present
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
if str(local_path) not in sys.path:
    sys.path.insert(0, str(local_path))

# Import project modules (will work if path is correct)
import src.utils.utils
import src.utils.logging_utils
import src.main as main
from src.plotting import *
import src.data_cleaning as data_cleaning

multimer_size = 5

# Function to load data
def download_data_dict(multimer_size):
    input_dir = project_root / 'back_end' / 'data' / 'filtered_reaction_database' / f'multimer_size_{multimer_size}'
    combined_database = pd.read_csv(input_dir / 'combined_database.csv', index_col=0)
    context_history = pd.read_csv(input_dir / 'context_history.csv', index_col=0)
    donor_history = pd.read_csv(input_dir / 'donor_history.csv', index_col=0)
    reaction_history = pd.read_csv(input_dir / 'reaction_history.csv', index_col=0)
    ubiquitin_history = pd.read_csv(input_dir / 'ubiquitin_history.csv', index_col=0)
    return {
        'combined_database': combined_database,
        'context_history': context_history,
        'donor_history': donor_history,
        'reaction_history': reaction_history,
        'ubiquitin_history': ubiquitin_history
    }

# Load the data
data_dict = download_data_dict(multimer_size)
combined_database = data_dict['combined_database']
context_history = data_dict['context_history']
donor_history = data_dict['donor_history']
reaction_history = data_dict['reaction_history']
ubiquitin_history = data_dict['ubiquitin_history']

# Function to load data
def download__all_data_dict(multimer_size):
    input_dir = project_root / 'back_end' / 'data' / 'reaction_database' / f'multimer_size_{multimer_size}'
    context_history = pd.read_csv(input_dir / 'context_history.csv', index_col=0)
    donor_history = pd.read_csv(input_dir / 'donor_history.csv', index_col=0)
    reaction_history = pd.read_csv(input_dir / 'reaction_history.csv', index_col=0)
    ubiquitin_history = pd.read_csv(input_dir / 'ubiquitin_history.csv', index_col=0)
    return {
        'context_history': context_history,
        'donor_history': donor_history,
        'reaction_history': reaction_history,
        'ubiquitin_history': ubiquitin_history
    }

# Load the data
data_dict = download__all_data_dict(multimer_size)
all_context_history = data_dict['context_history']
all_donor_history = data_dict['donor_history']
all_reaction_history = data_dict['reaction_history']
all_ubiquitin_history = data_dict['ubiquitin_history']

import json

# Replace with the actual path to your file
file_path = f"/Users/ekummelstedt/le_code_base/ubiquitinformatics/front_end/src/data/multimer_id_to_json{multimer_size}.json"

# Open and read the JSON file
with open(file_path, 'r') as f:
    multimers = json.load(f)

In [41]:
# Reset index if needed
all_ubiquitin_history = all_ubiquitin_history.reset_index()
all_context_history = all_context_history.reset_index()

In [None]:
# This should be a separate function in run_file.pyxs
multimered_ubiquitin_history, multimered_context_history = data_cleaning.global_deprotection_dual(all_ubiquitin_history, all_context_history)

In [43]:
for i in multimers.keys():
    num_of_reactions = len(multimered_ubiquitin_history[multimered_ubiquitin_history['final_multimer'] == multimers[i]])
    print(f"Multimer {i} has {num_of_reactions} reactions in the history.")

Multimer Ub5_5 has 128 reactions in the history.
Multimer Ub5_9 has 60 reactions in the history.
Multimer Ub5_13 has 44 reactions in the history.
Multimer Ub5_14 has 192 reactions in the history.
Multimer Ub5_22 has 64 reactions in the history.
Multimer Ub5_18 has 36 reactions in the history.
Multimer Ub5_21 has 64 reactions in the history.
Multimer Ub5_26 has 48 reactions in the history.
Multimer Ub5_24 has 64 reactions in the history.
Multimer Ub5_25 has 36 reactions in the history.
Multimer Ub5_27 has 60 reactions in the history.
Multimer Ub5_28 has 128 reactions in the history.
Multimer Ub5_23 has 44 reactions in the history.
Multimer Ub5_12 has 48 reactions in the history.
Multimer Ub5_32 has 96 reactions in the history.
Multimer Ub5_35 has 44 reactions in the history.
Multimer Ub5_36 has 48 reactions in the history.
Multimer Ub5_37 has 80 reactions in the history.
Multimer Ub5_31 has 48 reactions in the history.
Multimer Ub5_29 has 48 reactions in the history.
Multimer Ub5_30 has

In [None]:
# Compare to amount of branching/graph isomorphism 