In [None]:

# Analysis Plan:
# 1. Extract NiV-G chain A (residues 71-602) from PDB 2VSM
# 2. Download and extract antibody 14F8 Fv chains (heavy and light) from PDB 8XC4
# 3. Use AlphaFold-Multimer prediction tool to predict the complex structure
# 4. Calculate interface RMSD (i-RMSD) between predicted and crystal structure
# 5. Analyze predicted Aligned Error (pAE) plot for antibody-epitope interactions
# 6. Validate if i-RMSD < 2.5 Ã… and pAE values are low at the interface

import os
import numpy as np
from Bio.PDB import PDBParser, PDBIO, Select
import warnings
warnings.filterwarnings('ignore')

# Check available files
print("Available files in working directory:")
print(os.listdir('.'))


Available files in working directory:
['Surface-Plasmon-Resonance-Adaptyv-Bio-Docs.pdf', '2VSM.pdb', '.config', '.kernel_tmp', 'notebook.ipynb']


In [None]:

# Step 1: Extract NiV-G chain A (residues 71-602) from PDB 2VSM
parser = PDBParser(QUIET=True)
structure_2vsm = parser.get_structure('2VSM', '2VSM.pdb')

# Define a selector for NiV-G chain A, residues 71-602
class NivGSelector(Select):
    def accept_residue(self, residue):
        if residue.parent.id == 'A':
            res_id = residue.id[1]
            if 71 <= res_id <= 602:
                return True
        return False

# Save NiV-G chain A (residues 71-602)
io = PDBIO()
io.set_structure(structure_2vsm)
io.save('nivg_chain_A_71-602.pdb', NivGSelector())

print("Extracted NiV-G chain A (residues 71-602) from 2VSM")
print(f"File saved: nivg_chain_A_71-602.pdb")

# Verify the extraction
nivg_structure = parser.get_structure('NivG', 'nivg_chain_A_71-602.pdb')
residue_count = sum(1 for _ in nivg_structure.get_residues())
print(f"Number of residues extracted: {residue_count}")


Extracted NiV-G chain A (residues 71-602) from 2VSM
File saved: nivg_chain_A_71-602.pdb
Number of residues extracted: 412


In [None]:

# Step 2: Download PDB file 8XC4 containing NiV-G with antibody 14F8
import urllib.request

# Download 8XC4 PDB file
pdb_url = "https://files.rcsb.org/download/8XC4.pdb"
pdb_file = "8XC4.pdb"

print(f"Downloading {pdb_file} from RCSB PDB...")
urllib.request.urlretrieve(pdb_url, pdb_file)
print(f"Downloaded {pdb_file}")

# Parse the structure to understand chain organization
structure_8xc4 = parser.get_structure('8XC4', pdb_file)

print("\nChain information in 8XC4:")
for model in structure_8xc4:
    for chain in model:
        residue_count = len([r for r in chain.get_residues() if r.id[0] == ' '])
        print(f"  Chain {chain.id}: {residue_count} residues")


Downloading 8XC4.pdb from RCSB PDB...


Downloaded 8XC4.pdb



Chain information in 8XC4:
  Chain A: 428 residues
  Chain B: 428 residues
  Chain C: 229 residues
  Chain D: 211 residues
  Chain E: 229 residues
  Chain F: 211 residues
  Chain G: 0 residues
  Chain H: 0 residues
  Chain I: 0 residues
  Chain J: 0 residues
  Chain K: 0 residues
  Chain L: 0 residues


In [None]:

# Identify which chains correspond to NiV-G and which to antibody 14F8
# Chains A and B are likely NiV-G (428 residues each)
# Chains C/E (229 residues) are likely heavy chains of antibody
# Chains D/F (211 residues) are likely light chains of antibody

# Let's verify by checking the sequence identity and extract first few residues
from Bio.PDB import Polypeptide

# Standard 3-letter to 1-letter code dictionary
aa_3to1 = {'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
           'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
           'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
           'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y'}

print("Extracting sequences from each chain:\n")
for chain in structure_8xc4[0]:
    if chain.id in ['A', 'B', 'C', 'D', 'E', 'F']:
        residues = [r for r in chain.get_residues() if r.id[0] == ' ']
        if len(residues) > 0:
            # Get first 10 residues
            first_10 = residues[:10]
            seq = ''.join([aa_3to1.get(r.resname, 'X') for r in first_10])
            print(f"Chain {chain.id} ({len(residues)} res): {seq}...")


Extracting sequences from each chain:

Chain A (428 res): SEGVSNLVGL...
Chain B (428 res): SEGVSNLVGL...
Chain C (229 res): QVQLQESGPG...
Chain D (211 res): DIQMTQSPSS...
Chain E (229 res): QVQLQESGPG...
Chain F (211 res): DIQMTQSPSS...


In [None]:

# Chains A/B are NiV-G (start with SEGVSNLVGL)
# Chains C/E are heavy chains (start with QVQLQESGPG - typical antibody heavy chain)
# Chains D/F are light chains (start with DIQMTQSPSS - typical antibody light chain)

# Extract antibody Fv chains (C=heavy, D=light) - one copy
# We'll use chains C and D for the antibody 14F8 Fv

class AntibodyFvSelector(Select):
    def accept_chain(self, chain):
        return chain.id in ['C', 'D']
    
    def accept_residue(self, residue):
        # Only keep standard residues
        return residue.id[0] == ' '

io = PDBIO()
io.set_structure(structure_8xc4)
io.save('antibody_14F8_Fv.pdb', AntibodyFvSelector())

print("Extracted antibody 14F8 Fv (chains C and D) from 8XC4")
print(f"File saved: antibody_14F8_Fv.pdb")

# Verify extraction
ab_structure = parser.get_structure('14F8', 'antibody_14F8_Fv.pdb')
for chain in ab_structure[0]:
    residue_count = len([r for r in chain.get_residues() if r.id[0] == ' '])
    print(f"  Chain {chain.id}: {residue_count} residues")


Extracted antibody 14F8 Fv (chains C and D) from 8XC4
File saved: antibody_14F8_Fv.pdb
  Chain C: 229 residues
  Chain D: 211 residues


In [None]:

# Step 3: Prepare input sequences for AlphaFold-Multimer prediction
# We need to extract the sequences from the prepared structures

def extract_sequence_from_pdb(pdb_file):
    """Extract amino acid sequence from PDB file"""
    structure = parser.get_structure('temp', pdb_file)
    sequences = {}
    
    for chain in structure[0]:
        residues = [r for r in chain.get_residues() if r.id[0] == ' ']
        seq = ''.join([aa_3to1.get(r.resname, 'X') for r in residues])
        sequences[chain.id] = seq
    
    return sequences

# Extract sequences
nivg_sequences = extract_sequence_from_pdb('nivg_chain_A_71-602.pdb')
ab_sequences = extract_sequence_from_pdb('antibody_14F8_Fv.pdb')

print("NiV-G Chain A (residues 71-602):")
print(f"  Length: {len(nivg_sequences['A'])} residues")
print(f"  Sequence: {nivg_sequences['A'][:50]}...")
print()

print("Antibody 14F8 Fv:")
print(f"  Heavy chain (C): {len(ab_sequences['C'])} residues")
print(f"  Sequence: {ab_sequences['C'][:50]}...")
print()
print(f"  Light chain (D): {len(ab_sequences['D'])} residues")
print(f"  Sequence: {ab_sequences['D'][:50]}...")


NiV-G Chain A (residues 71-602):
  Length: 412 residues
  Sequence: ICLQKTSNQILKPKLISYTLGQSGTCITDPLLAMDEGYFAYSHLERIGSC...

Antibody 14F8 Fv:
  Heavy chain (C): 229 residues
  Sequence: QVQLQESGPGVVKPSETLSLTCAVSGGSISDTYRWSWIRQPPGKGLEWIG...

  Light chain (D): 211 residues
  Sequence: DIQMTQSPSSLSASVGDRVTITCRASQGIIDYLSWYQQKPGKAPKLLIST...


In [None]:

# Step 4: Use AlphaFold-Multimer to predict the complex structure
# We'll use a web-based or API tool for AlphaFold-Multimer prediction

# First, let's check if there's a local AlphaFold installation or if we need to use an API
# Try importing required libraries for structure prediction

import subprocess
import sys

# Check for available prediction tools
print("Checking for available structure prediction tools...")

# Try to find ColabFold or other AlphaFold implementations
try:
    import colabfold
    print("ColabFold is available")
    has_colabfold = True
except ImportError:
    print("ColabFold not found")
    has_colabfold = False

# Check for alphafold
try:
    import alphafold
    print("AlphaFold is available")
    has_alphafold = True
except ImportError:
    print("AlphaFold not found")
    has_alphafold = False

print(f"\nhas_colabfold: {has_colabfold}")
print(f"has_alphafold: {has_alphafold}")


Checking for available structure prediction tools...
ColabFold not found
AlphaFold not found

has_colabfold: False
has_alphafold: False


In [None]:

# Since local AlphaFold is not available, let's check for command-line tools
# Check if colabfold_batch is available as a command-line tool

result = subprocess.run(['which', 'colabfold_batch'], 
                       capture_output=True, text=True)
colabfold_path = result.stdout.strip()

if colabfold_path:
    print(f"ColabFold batch command found at: {colabfold_path}")
else:
    print("ColabFold batch command not found")

# Check for alphafold command
result2 = subprocess.run(['which', 'alphafold'], 
                        capture_output=True, text=True)
alphafold_path = result2.stdout.strip()

if alphafold_path:
    print(f"AlphaFold command found at: {alphafold_path}")
else:
    print("AlphaFold command not found")

# List available commands that might be related to structure prediction
print("\nSearching for structure prediction tools...")
for cmd in ['colabfold', 'alphafold', 'localcolabfold', 'run_alphafold', 
            'colabfold_search', 'colabfold_batch']:
    result = subprocess.run(['which', cmd], capture_output=True, text=True)
    if result.stdout.strip():
        print(f"  Found: {cmd} at {result.stdout.strip()}")


ColabFold batch command not found
AlphaFold command not found

Searching for structure prediction tools...


In [None]:

# Check if there are any AlphaFold-related tools available via pip or conda
# Let's look for prediction services or APIs

# Check what's in PATH and available
result = subprocess.run(['ls', '/usr/local/bin'], capture_output=True, text=True)
print("Contents of /usr/local/bin:")
print(result.stdout)

# Check for any alphafold or structure prediction related packages
result = subprocess.run(['pip', 'list'], capture_output=True, text=True)
print("\nSearching pip packages for structure prediction tools...")
for line in result.stdout.split('\n'):
    if any(term in line.lower() for term in ['alphafold', 'colabfold', 'fold', 'structure']):
        print(f"  {line}")


Contents of /usr/local/bin:
2to3
2to3-3.12
idle
idle3
idle3.12
pip
pip3
pip3.12
pydoc
pydoc3
pydoc3.12
python
python-config
python3
python3-config
python3.12
python3.12-config




Searching pip packages for structure prediction tools...


In [None]:

# Since AlphaFold/ColabFold are not installed locally, let's check for external API services
# or try to install ColabFold

# First, let's try installing colabfold-related packages
print("Attempting to install ColabFold...")
result = subprocess.run(['pip', 'install', 'colabfold[alphafold]', '--quiet'], 
                       capture_output=True, text=True, timeout=300)

if result.returncode == 0:
    print("ColabFold installation successful")
else:
    print(f"ColabFold installation failed: {result.stderr}")
    
# Try a simpler approach - install individual components
print("\nTrying to install alphafold components...")
result2 = subprocess.run(['pip', 'install', 'alphafold-colabfold', '--quiet'], 
                        capture_output=True, text=True, timeout=300)
if result2.returncode == 0:
    print("alphafold-colabfold installation successful")
else:
    print(f"Installation failed: {result2.stderr[:500]}")


Attempting to install ColabFold...


ColabFold installation failed: [31mERROR: Could not install packages due to an OSError: [Errno 13] Permission denied: '/app'
Check the permissions.
[0m[31m
[0m

Trying to install alphafold components...


Installation failed: [31mERROR: Could not install packages due to an OSError: [Errno 13] Permission denied: '/app'
Check the permissions.
[0m[31m
[0m


In [None]:

# Installation failed due to permissions. Let's try installing to user directory
print("Attempting to install ColabFold to user directory...")
result = subprocess.run(['pip', 'install', '--user', 'colabfold[alphafold]'], 
                       capture_output=True, text=True, timeout=600)

if result.returncode == 0:
    print("ColabFold installation successful")
    print(result.stdout[-500:])
else:
    print(f"ColabFold installation failed")
    print(f"stderr: {result.stderr[-1000:]}")
    print(f"stdout: {result.stdout[-500:]}")


Attempting to install ColabFold to user directory...


ColabFold installation failed
stderr: [31mERROR: Could not install packages due to an OSError: [Errno 13] Permission denied: '/app'
Check the permissions.
[0m[31m
[0m
stdout: NEWRELIC: 2025-11-07 18:02:54 (135) - New Relic could not start because the newrelic-admin script was called from a Python installation that is different from the Python installation that is currently running. To fix this problem, call the newrelic-admin script from the Python installation that is currently running (details below).

newrelic-admin Python directory: None
current Python directory: '/app/miniconda'
newrelic-admin Python version: None
current Python version: '3.12'



In [None]:

# Since we cannot install AlphaFold locally, let's check if there's an API service available
# The research objective mentions "using a tool that implements AlphaFold-Multimer"
# Let's check if there are environment variables or configuration that point to such a tool

import json

print("Checking environment for AlphaFold services...")
print("\nRelevant environment variables:")
for key, value in os.environ.items():
    if any(term in key.lower() for term in ['alphafold', 'fold', 'api', 'service', 'prediction']):
        print(f"  {key}: {value[:100] if len(value) > 100 else value}")

# Check if there's a service endpoint configured
print("\n" + "="*60)
print("Checking for API configuration files...")
config_paths = ['.config', 'config.json', 'settings.json', '.env']
for config_path in config_paths:
    if os.path.exists(config_path):
        print(f"Found: {config_path}")
        if config_path.endswith('.json'):
            try:
                with open(config_path) as f:
                    config = json.load(f)
                    print(f"  Contents: {config}")
            except:
                pass


Checking environment for AlphaFold services...

Relevant environment variables:

Checking for API configuration files...
Found: .config


In [None]:

# Check what's in the .config directory
result = subprocess.run(['ls', '-la', '.config'], capture_output=True, text=True)
print("Contents of .config:")
print(result.stdout)

# Check if there are any alphafold-related configurations
result2 = subprocess.run(['find', '.config', '-type', 'f'], capture_output=True, text=True)
print("\nFiles in .config:")
print(result2.stdout)


Contents of .config:
total 0
drwxr-xr-x 3 kerneluser kerneluser  60 Nov  5 03:22 .
drwxr-xr-x 5 kerneluser kerneluser 220 Nov  7 18:02 ..
drwxr-xr-x 2 kerneluser kerneluser  60 Nov  5 03:22 matplotlib


Files in .config:
.config/matplotlib/matplotlibrc



In [None]:

# Since AlphaFold-Multimer is not available in this environment, I need to report this limitation
# According to the core principles, I must not fabricate data or proceed with analysis that
# requires tools I don't have access to

print("="*70)
print("CRITICAL LIMITATION IDENTIFIED")
print("="*70)
print()
print("The research objective requires using AlphaFold-Multimer to predict")
print("the complex structure between NiV-G and antibody 14F8.")
print()
print("Current status:")
print("  - NiV-G chain A (residues 71-602) extracted: SUCCESS")
print("  - Antibody 14F8 Fv (heavy and light chains) extracted: SUCCESS")
print("  - AlphaFold-Multimer installation: FAILED")
print("  - Command-line tools (colabfold_batch, alphafold): NOT FOUND")
print("  - API service configuration: NOT FOUND")
print()
print("The analysis cannot proceed without access to AlphaFold-Multimer or")
print("an equivalent structure prediction tool. I cannot fabricate or simulate")
print("prediction results as this would violate core analytical principles.")
print()
print("To complete this analysis, one of the following is required:")
print("  1. Pre-installed AlphaFold-Multimer or ColabFold")
print("  2. Access to an AlphaFold API service")
print("  3. Pre-computed prediction results from AlphaFold-Multimer")
print("="*70)


CRITICAL LIMITATION IDENTIFIED

The research objective requires using AlphaFold-Multimer to predict
the complex structure between NiV-G and antibody 14F8.

Current status:
  - NiV-G chain A (residues 71-602) extracted: SUCCESS
  - Antibody 14F8 Fv (heavy and light chains) extracted: SUCCESS
  - AlphaFold-Multimer installation: FAILED
  - Command-line tools (colabfold_batch, alphafold): NOT FOUND
  - API service configuration: NOT FOUND

The analysis cannot proceed without access to AlphaFold-Multimer or
an equivalent structure prediction tool. I cannot fabricate or simulate
prediction results as this would violate core analytical principles.

To complete this analysis, one of the following is required:
  1. Pre-installed AlphaFold-Multimer or ColabFold
  2. Access to an AlphaFold API service
  3. Pre-computed prediction results from AlphaFold-Multimer


In [None]:

# Let me check if there are any other structure prediction tools or services available
# Check for ESMFold or other alternatives that might be installed

print("Checking for alternative structure prediction tools...")

# Check for ESMFold
try:
    import esm
    print("ESM (ESMFold) is available")
    has_esm = True
except ImportError:
    print("ESM not found")
    has_esm = False

# Check for RoseTTAFold
result = subprocess.run(['which', 'rosettafold'], capture_output=True, text=True)
if result.stdout.strip():
    print(f"RoseTTAFold found at: {result.stdout.strip()}")
else:
    print("RoseTTAFold not found")

# Check for OpenFold
try:
    import openfold
    print("OpenFold is available")
    has_openfold = True
except ImportError:
    print("OpenFold not found")
    has_openfold = False

print("\nNo structure prediction tools are available in this environment.")


Checking for alternative structure prediction tools...
ESM not found
RoseTTAFold not found
OpenFold not found

No structure prediction tools are available in this environment.


In [None]:

# Let me try a different approach - check if there's an external AlphaFold server API
# that we can use for structure prediction

# Try using the AlphaFold Server API if available
# First, let's create the input FASTA file for the complex

fasta_content = f""">NiV-G_chain_A
{nivg_sequences['A']}
>14F8_heavy_chain
{ab_sequences['C']}
>14F8_light_chain
{ab_sequences['D']}
"""

with open('nivg_14f8_complex.fasta', 'w') as f:
    f.write(fasta_content)

print("Created FASTA file for AlphaFold-Multimer input:")
print(f"  NiV-G chain A: {len(nivg_sequences['A'])} residues")
print(f"  14F8 heavy chain: {len(ab_sequences['C'])} residues")
print(f"  14F8 light chain: {len(ab_sequences['D'])} residues")
print(f"  Total: {len(nivg_sequences['A']) + len(ab_sequences['C']) + len(ab_sequences['D'])} residues")
print("\nFile saved: nivg_14f8_complex.fasta")

# Calculate expected size
total_residues = len(nivg_sequences['A']) + len(ab_sequences['C']) + len(ab_sequences['D'])
print(f"\nTotal residues in complex: {total_residues}")


Created FASTA file for AlphaFold-Multimer input:
  NiV-G chain A: 412 residues
  14F8 heavy chain: 229 residues
  14F8 light chain: 211 residues
  Total: 852 residues

File saved: nivg_14f8_complex.fasta

Total residues in complex: 852


In [None]:

# Let me try to access external AlphaFold prediction services via API
# Check if we can use AlphaFold Server API or other publicly available services

import requests

# Try AlphaFold Server API (if available)
print("Attempting to access AlphaFold Server API...")

# First, check if there's an API key or endpoint configured
api_endpoints = [
    "https://alphafoldserver.com/api",
    "https://api.alphafold.com",
    "https://alphafold.ebi.ac.uk/api"
]

for endpoint in api_endpoints:
    try:
        response = requests.get(endpoint, timeout=10)
        print(f"  {endpoint}: Status {response.status_code}")
        if response.status_code == 200:
            print(f"    Response: {response.text[:200]}")
    except requests.exceptions.RequestException as e:
        print(f"  {endpoint}: Connection failed ({type(e).__name__})")

print("\nAlphaFold Server API is not accessible or requires authentication.")


Attempting to access AlphaFold Server API...
  https://alphafoldserver.com/api: Status 404
  https://api.alphafold.com: Connection failed (ConnectionError)


  https://alphafold.ebi.ac.uk/api: Status 404

AlphaFold Server API is not accessible or requires authentication.


In [None]:

# Let me check if there are any pre-existing AlphaFold prediction results in the directory
# or if there's a way to access AlphaFold through a different mechanism

print("Checking for pre-existing prediction results or AlphaFold data...")
print("\nCurrent directory contents:")
for item in os.listdir('.'):
    print(f"  {item}")

# Check if there are any subdirectories with predictions
result = subprocess.run(['find', '.', '-name', '*.pdb', '-o', '-name', '*.cif'], 
                       capture_output=True, text=True)
print("\nPDB/CIF files found:")
print(result.stdout)

# Check for any JSON files that might contain prediction data
result = subprocess.run(['find', '.', '-name', '*.json'], 
                       capture_output=True, text=True)
print("\nJSON files found:")
print(result.stdout)


Checking for pre-existing prediction results or AlphaFold data...

Current directory contents:
  Surface-Plasmon-Resonance-Adaptyv-Bio-Docs.pdf
  2VSM.pdb
  .config
  .kernel_tmp
  notebook.ipynb
  nivg_chain_A_71-602.pdb
  downloads
  8XC4.pdb
  antibody_14F8_Fv.pdb
  nivg_14f8_complex.fasta

PDB/CIF files found:
./2VSM.pdb
./nivg_chain_A_71-602.pdb
./8XC4.pdb
./antibody_14F8_Fv.pdb


JSON files found:



In [None]:

# Final attempt: Check if there's a way to use search_and_retrieve to access AlphaFold data
# or any other external prediction service

# Let me check if AlphaFold predictions exist in public databases for this specific complex
# Note: AlphaFold DB typically contains single-chain predictions, not complexes

print("Exploring alternative approaches...")
print("\nThe research objective requires:")
print("  1. AlphaFold-Multimer prediction of NiV-G + 14F8 complex")
print("  2. Calculate i-RMSD between predicted and crystal structure (8XC4)")
print("  3. Analyze predicted Aligned Error (pAE) plot")
print()
print("Current limitations:")
print("  - No local AlphaFold-Multimer installation")
print("  - No accessible API endpoints")
print("  - Cannot install required packages due to permissions")
print("  - No pre-computed predictions available")
print()
print("Status: UNABLE TO COMPLETE")
print()
print("The analysis requires computational structure prediction capabilities")
print("that are not available in the current environment. I have successfully")
print("prepared the input data (sequences and structures), but cannot execute")
print("the prediction step that is central to the research objective.")


Exploring alternative approaches...

The research objective requires:
  1. AlphaFold-Multimer prediction of NiV-G + 14F8 complex
  2. Calculate i-RMSD between predicted and crystal structure (8XC4)
  3. Analyze predicted Aligned Error (pAE) plot

Current limitations:
  - No local AlphaFold-Multimer installation
  - No accessible API endpoints
  - Cannot install required packages due to permissions
  - No pre-computed predictions available

Status: UNABLE TO COMPLETE

The analysis requires computational structure prediction capabilities
that are not available in the current environment. I have successfully
prepared the input data (sequences and structures), but cannot execute
the prediction step that is central to the research objective.
