In [None]:
# Edit sequence_1, sequence_2, filename—the input data for prediction of 3d structure
# The Colab runtime may report a crash from an expected restart during installation of a library

# Comment out this line to enable verbose output
# %%capture

!pip install ImmuneBuilder 
!pip install -q condacolab
!pip install py3Dmol # Added for visualization later

In [None]:
import condacolab
import sys # import modules to access their functions
condacolab.install_mambaforge() # use of mamba to install conda modules

**Important:** If the output of the cell above requests you to restart the runtime, please do so now by clicking "Runtime" -> "Restart runtime" in the menu bar.

In [None]:
# Comment out this line to enable verbose output
# %%capture

!mamba install -y openmm pdbfixer # install openmm (toolkit for molecular simulation; refine prediction) and pdbfixer
!conda install -y -c bioconda anarci # install anarci module from bioconda distribution

## Step 2: Define Sequences and Predict Structure

In [None]:
# Step 2: Run the model (adapted from Appendix C)

# It's good practice to ensure ImmuneBuilder is installed before importing,
# though the %%capture in the previous cells might hide late install messages.
# Consider removing the !pip install from here if the first cell handles it reliably.
# For now, keeping it as per Appendix C's structure.
# %%capture
!pip install -q ImmuneBuilder 

protein_type = "TCR" # Explicitly TCR for this notebook
from anarci import number # github.com/oxpig/ANARCI; aligns sequence to canonical protein
from ImmuneBuilder import TCRBuilder2 # prediction of 3d structure

# Select model
predictor = TCRBuilder2() # Using TCRBuilder2 as per Appendix C

# Inspect that TCR sequences are annotated as TCR alpha and beta chains
# Sequence data from www.rcsb.org/structure/5d2l (accessed on 10 March 2024)
# As provided in Appendix C
sequence_1 = 'MILNVEQSPQSLHVQEGDSTNFTCSFPSSNFYALHWYRWETAKSP''EALFVMTLNGDEKKKGRISATLNTKEGYSYLYIKGSQPEDSATYLCAFITGNQFYF''GTGTSLTVIPNIQNPDPAVYQLRDSKSSDKSVCLFTDFDSQTNVSQSKDSDVYITDK''CVLDMRSMDFKSNSAVAWSNKSDFACANAFNNSIIPEDTFFPSPESS'
sequence_2 = 'MGAGVSQSPSNKVTEKGKDVELRCDPISGHTALYWYRQRLGQGLE''FLIYFQGNSAPDKSGLPSDRFSAERTGESVSTLTIQRTQQEDSAVYLCASSQTQLWET''QYFGPGTRLLVLEDLKNVFPPEVAVFEPSEAEISHTQKATLVCLATGFYPDHVELSW''WVNGKEVHSGVCTDPQPLKEQPALNDSRYALSSRLRVSATFWQNPRNHFRCQVQF''YGLSENDEWTQDRAKPVTQIVSAEAWGRAD'

sequence_1 = "".join(sequence_1.split()) # Remove whitespace
sequence_2 = "".join(sequence_2.split()) # Remove whitespace
filename = 'tcr_structure_appendix_c.pdb' # Output file name

# Anarci will reject the sequence if it is not an expected match to the immunoprotein
# Ensure sys.path is set up if anarci was installed in a custom conda env
# (The mamba install should handle this, but good to be mindful)
import sys
if f"/usr/local/lib/python{sys.version_info.major}.{sys.version_info.minor}/site-packages/" not in sys.path:
    sys.path.insert(0, f"/usr/local/lib/python{sys.version_info.major}.{sys.version_info.minor}/site-packages/")

print("Numbering chain 1 with ANARCI...")
numbered_chain1, chain1_type = number(sequence_1)
if chain1_type:
    print(f"Chain 1 identified as: {chain1_type}")
else:
    print("Chain 1 could not be typed by ANARCI or is not a valid TCR/Ig chain.")

print("\nNumbering chain 2 with ANARCI...")
numbered_chain2, chain2_type = number(sequence_2)
if chain2_type:
    print(f"Chain 2 identified as: {chain2_type}")
else:
    print("Chain 2 could not be typed by ANARCI or is not a valid TCR/Ig chain.")

input_chains = dict() 
if chain1_type: # Use the type returned by ANARCI as the key
    input_chains[chain1_type] = sequence_1 
if chain2_type:
    input_chains[chain2_type] = sequence_2

if not input_chains:
    print("\nERROR: No valid TCR chains were identified by ANARCI. Cannot proceed with prediction.")
elif len(input_chains) < 2 and protein_type == "TCR": # TCRs usually need two chains
    print(f"\nWARNING: Only {len(input_chains)} chain(s) identified for TCR prediction. Ensure both Alpha and Beta chains are provided and correctly typed by ANARCI.")
    # Potentially allow prediction if at least one chain is valid, though TCRBuilder2 might require both.
    # For now, proceed if at least one chain is valid, user can debug if prediction fails.
    if chain1_type and not chain2_type:
      print("Proceeding with only chain 1.")
    elif not chain1_type and chain2_type:
      print("Proceeding with only chain 2.")


if input_chains:
    print(f"\nInput for predictor: {input_chains.keys()}")
    try:
        predictor.predict(input_chains).save(filename)
        print(f"\nPrediction complete. Structure saved to {filename}")
    except Exception as e:
        print(f"\nERROR during prediction: {e}")
else:
    print("\nPrediction skipped due to ANARCI chain identification issues.")

## Step 3: Visualize Predicted Structure

In [None]:
# Step 3: Visualize the prediction

import py3Dmol
import os # To check if the file exists

# Check if the PDB file was created
if os.path.exists(filename):
    print(f"Visualizing PDB file: {filename}")
    # Setup the 3Dmol viewer
    view = py3Dmol.view(width=600, height=400)
    
    # Add the PDB model
    view.addModel(open(filename, 'r').read(), 'pdb')
    
    # Style the cartoon representation
    # Color by chain: TCRs typically have alpha and beta chains.
    # We can try to color them if ANARCI identified them, otherwise default.
    # Assuming chain1_type and chain2_type are available from the previous cell.
    # Default colors if types are not standard 'TRA', 'TRB', etc.
    
    view.setStyle({'cartoon': {'colorscheme': 'chainHetView'}}) # A general scheme
    
    # Specific coloring if TRA/TRB are identified (example)
    # This part might need adjustment based on exact ANARCI outputs for chain types
    # For now, 'chainHetView' is a good default.
    # if 'TRA' in input_chains: # input_chains was defined in the prediction cell
    #    view.setStyle({'chain':'A'}, {'cartoon': {'color':'blue'}}) # Adjust chain letter if needed
    # if 'TRB' in input_chains:
    #    view.setStyle({'chain':'B'}, {'cartoon': {'color':'green'}}) # Adjust chain letter if needed

    view.setBackgroundColor('white')
    view.zoomTo()
    view.show()
else:
    print(f"PDB file {filename} not found. Skipping visualization. Please check for errors in the prediction step.")

## Step 4: Download PDB File

In [None]:
# Step 4: Download the predicted structure

from google.colab import files
import os

if os.path.exists(filename):
    print(f"Offering {filename} for download...")
    files.download(filename)
else:
    print(f"PDB file {filename} not found. Skipping download.")