In [35]:
import sys
from pathlib import Path
import pandas as pd

# Handle paths for Jupyter (where __file__ is not defined)
try:
    current_path = Path(__file__).resolve()
except NameError:
    # __file__ is not defined in Jupyter; use cwd as fallback
    current_path = Path.cwd()

# Set up project paths
project_root = current_path.parents[1] if len(current_path.parents) >= 3 else current_path
local_path = project_root / 'back_end'

# Add project paths to sys.path if not already present
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
if str(local_path) not in sys.path:
    sys.path.insert(0, str(local_path))

# Import project modules (will work if path is correct)
import src.utils.utils
import src.utils.logging_utils
import src.main as main
from src.plotting import *
import src.data_cleaning as data_cleaning

multimer_size = 5

# Function to load data
def download_data_dict(multimer_size):
    input_dir = project_root / 'back_end' / 'data' / 'filtered_reaction_database' / f'multimer_size_{multimer_size}'
    combined_database = pd.read_csv(input_dir / 'combined_database.csv', index_col=0)
    context_history = pd.read_csv(input_dir / 'context_history.csv', index_col=0)
    donor_history = pd.read_csv(input_dir / 'donor_history.csv', index_col=0)
    reaction_history = pd.read_csv(input_dir / 'reaction_history.csv', index_col=0)
    ubiquitin_history = pd.read_csv(input_dir / 'ubiquitin_history.csv', index_col=0)
    return {
        'combined_database': combined_database,
        'context_history': context_history,
        'donor_history': donor_history,
        'reaction_history': reaction_history,
        'ubiquitin_history': ubiquitin_history
    }

# Load the data
data_dict = download_data_dict(multimer_size)
combined_database = data_dict['combined_database']
context_history = data_dict['context_history']
donor_history = data_dict['donor_history']
reaction_history = data_dict['reaction_history']
ubiquitin_history = data_dict['ubiquitin_history']

# Function to load data
def download__all_data_dict(multimer_size):
    input_dir = project_root / 'back_end' / 'data' / 'reaction_database' / f'multimer_size_{multimer_size}'
    context_history = pd.read_csv(input_dir / 'context_history.csv', index_col=0)
    donor_history = pd.read_csv(input_dir / 'donor_history.csv', index_col=0)
    reaction_history = pd.read_csv(input_dir / 'reaction_history.csv', index_col=0)
    ubiquitin_history = pd.read_csv(input_dir / 'ubiquitin_history.csv', index_col=0)
    return {
        'context_history': context_history,
        'donor_history': donor_history,
        'reaction_history': reaction_history,
        'ubiquitin_history': ubiquitin_history
    }

# Load the data
data_dict = download__all_data_dict(multimer_size)
all_context_history = data_dict['context_history']
all_donor_history = data_dict['donor_history']
all_reaction_history = data_dict['reaction_history']
all_ubiquitin_history = data_dict['ubiquitin_history']

import json

# Replace with the actual path to your file
file_path = f"/Users/ekummelstedt/le_code_base/ubiquitinformatics/front_end/src/data/multimer_id_to_json{multimer_size}.json"

# Open and read the JSON file
with open(file_path, 'r') as f:
    multimers = json.load(f)

In [30]:
# Reset index if needed
all_ubiquitin_history = all_ubiquitin_history.reset_index()
all_context_history = all_context_history.reset_index()

In [31]:
# This should be a separate function in run_file.pyxs
multimered_ubiquitin_history, multimered_context_history = data_cleaning.global_deprotection_dual(all_ubiquitin_history, all_context_history)

In [32]:
def get_multimer_edges_by_lysines(context_data: dict, 
                                  lysine_ids: dict
                                  ) -> dict:
    """
    Extracts edges from the context data where the source or target lysine ID is in the specified set.
    
    Parameters:
        context_data (dict): Dictionary containing multimer contexts.
        lysine_ids (set): Set of lysine IDs to filter edges by.
    
    Returns:
        dict: Dictionary with keys as multimer IDs and values as lists of edges matching the lysine IDs.
    """

    # DONE reveal all the edges for trimers 
    def reveal_edges(context_):
        """
        Gives the ID of the linkages from the context.
        """
        edges = context_['conjugated_lysines']
        return edges

    reveal_all_edges = {key: reveal_edges(value) for key, value in context_data.items()}

    # DONE only select edges with K63 and K48 linkages
    # Keep only entries where all edge labels are either K63 or K48
    def filter_edges_by_labels(reveal_all_edges, labels=lysine_ids):
        """
        Filters the edges in the reveal_all_edges dictionary to keep only those
        where all edge labels are in the specified set of labels.
        
        Parameters:
            reveal_all_edges (dict): Dictionary containing edges with their labels.
            labels (set): Set of labels to filter by (default is {'K63', 'K48'}).
        
        Returns:
            dict: Filtered dictionary with edges that match the criteria.
        """
        return {
            key: edges
            for key, edges in reveal_all_edges.items()
            if all(edge[1] in labels for edge in edges)
        } 

    return filter_edges_by_labels(reveal_all_edges)

In [33]:
# DONE reveal all the edges for trimers 
def reveal_edges(context_):
    """
    Gives the ID of the linkages from the context.
    """
    edges = context_['conjugated_lysines']
    return edges

json_counting = {}

for i in range(len(multimers.keys())):
    num_of_reactions = len(multimered_ubiquitin_history[multimered_ubiquitin_history['final_multimer'] == multimers[f'Ub{multimer_size}_{i+1}']])
    ubi_DAG, ubi_context = main.iterate_through_ubiquitin(multimers[f'Ub{multimer_size}_{i+1}'])
    json_counting[f'Ub{multimer_size}_{i+1}'] = {
        num_of_reactions: 'num_of_reactions', 
        'ubiDAG_edges': str(reveal_edges(ubi_context))
        }
json_counting

{'Ub4_1': {32: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K48', 2], [2, 'K48', 3], [3, 'K48', 4]]"},
 'Ub4_2': {12: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K48', 2], [2, 'K48', 3], [3, 'K63', 4]]"},
 'Ub4_3': {40: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K48', 2], [2, 'K63', 3], [2, 'K48', 4]]"},
 'Ub4_4': {64: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K63', 2], [1, 'K48', 3], [3, 'K48', 4]]"},
 'Ub4_5': {16: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K48', 2], [2, 'K63', 3], [3, 'K48', 4]]"},
 'Ub4_6': {24: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K48', 2], [2, 'K63', 3], [3, 'K63', 4]]"},
 'Ub4_7': {36: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K63', 2], [1, 'K48', 3], [3, 'K63', 4]]"},
 'Ub4_8': {36: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K63', 2], [2, 'K48', 3], [1, 'K48', 4]]"},
 'Ub4_9': {64: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K63', 2], [2, 'K63', 3], [1, 'K48', 4]]"},
 'Ub4_10': {24: 'num_of_reactions',
  'ubiDAG_edges': "[[1, 'K63', 2], [2

In [34]:
# Compare to amount of branching/graph isomorphism 