In [1]:

import sys
from pathlib import Path
import pandas as pd

# Handle paths for Jupyter (where __file__ is not defined)
try:
    current_path = Path(__file__).resolve()
except NameError:
    # __file__ is not defined in Jupyter; use cwd as fallback
    current_path = Path.cwd()

# Set up project paths
project_root = current_path.parents[1] if len(current_path.parents) >= 3 else current_path
local_path = project_root / 'back_end'

# Add project paths to sys.path if not already present
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
if str(local_path) not in sys.path:
    sys.path.insert(0, str(local_path))

# Import project modules (will work if path is correct)
import src.utils.utils
import src.utils.logging_utils
import src.main as main
import src.plotting as plotting

# Multimer size
jsonOutput = [
    {
      "from": 0,
      "to": 2,
      "linkage": "K63"
    },
    {
      "from": 2,
      "to": 4,
      "linkage": "K48"
    },
    {
      "from": 4,
      "to": 7,
      "linkage": "K48"
    },
    {
      "from": 7,
      "to": 11,
      "linkage": "K48"
    }
  ]
multimer_size = len(jsonOutput) + 1

# Function to load data
def download_data_dict(multimer_size):
    input_dir = project_root / 'back_end' / 'data' / 'filtered_reaction_database' / f'multimer_size_{multimer_size}'
    combined_database = pd.read_csv(input_dir / 'combined_database.csv', index_col=0)
    context_history = pd.read_csv(input_dir / 'context_history.csv', index_col=0)
    donor_history = pd.read_csv(input_dir / 'donor_history.csv', index_col=0)
    reaction_history = pd.read_csv(input_dir / 'reaction_history.csv', index_col=0)
    ubiquitin_history = pd.read_csv(input_dir / 'ubiquitin_history.csv', index_col=0)
    return {
        'combined_database': combined_database,
        'context_history': context_history,
        'donor_history': donor_history,
        'reaction_history': reaction_history,
        'ubiquitin_history': ubiquitin_history
    }

# Load the data
data_dict = download_data_dict(multimer_size)
combined_database = data_dict['combined_database']
context_history = data_dict['context_history']
donor_history = data_dict['donor_history']
reaction_history = data_dict['reaction_history']
ubiquitin_history = data_dict['ubiquitin_history']

In [4]:
project_root

PosixPath('/Users/ekummelstedt/le_code_base/ubiquitinformatics')

In [None]:
ubiquitin_history

Unnamed: 0,index,multimer_id,used_in_synthesis,initial_acceptor,dimer_formation,dimer_deprotection,trimer_formation,trimer_deprotection,tetramer_formation,tetramer_deprotection,pentamer_formation,final_multimer
0,47,Ub5_5,1,"{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_..."
1,71,Ub5_5,0,"{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_..."
2,91,Ub5_5,0,"{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_..."
3,111,Ub5_9,1,"{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_..."
4,127,Ub5_13,1,"{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_..."
...,...,...,...,...,...,...,...,...,...,...,...,...
109,2127,Ub5_17,0,"{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_..."
110,2143,Ub5_15,1,"{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_..."
111,2147,Ub5_16,1,"{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_..."
112,2163,Ub5_19,1,"{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_...","{'protein': '1ubq', 'chain_number': 1, 'FASTA_..."


In [7]:
import pandas as pd
import json
from pathlib import Path

def generate_final_multimer_map(multimer_size: int):
    project_root = Path("/Users/ekummelstedt/le_code_base/ubiquitinformatics")
    input_path = project_root / 'back_end' / 'data' / 'filtered_reaction_database' / f'multimer_size_{multimer_size}' / 'ubiquitin_history.csv'
    output_path = project_root / 'front_end' / 'public' / f'multimer_id_to_json{multimer_size}.json'

    # Load the ubiquitin_history DataFrame
    df = pd.read_csv(input_path, index_col=0)

    # Create a dictionary mapping from multimer_id to final_multimer
    mapping = df.set_index('multimer_id')['final_multimer'].to_dict()

    # Save to JSON
    with open(output_path, 'w') as f:
        json.dump(mapping, f, indent=2)

    print(f"Mapping file saved to: {output_path}")

In [8]:
generate_final_multimer_map(5)

Mapping file saved to: /Users/ekummelstedt/le_code_base/ubiquitinformatics/front_end/public/multimer_id_to_json5.json
