## Hydrophobic - Polar Lattice Model MonteCarlo sampling

Let's perform an initial MCMC sampling on a HP model to later do approximate counting of proteins.

Aminoacids & H (Hydrophobic) - P (Polar/Neutral) Classification:
| Aminoacid     | Letter | H or P |
|---------------|--------|--------|
| Alanine       | A      | H      |
| Arginine      | R      | P      |
| Asparagine    | N      | P      |
| Aspartate     | D      | P      |
| Cysteine      | C      | H      |
| Glutamine     | Q      | P      |
| Glutamate     | E      | P      |
| Glycine       | G      | P      |
| Histidine     | H      | P      |
| Isoleucine    | I      | H      |
| Leucine       | L      | H      |
| Lysine        | K      | P      |
| Methionine    | M      | H      |
| Phenylalanine | F      | H      |
| Proline       | P      | P      |
| Serine        | S      | P      |
| Threonine     | T      | P      |
| Tryptophan    | W      | H      |
| Tyrosine      | Y      | H      |
| Valine        | V      | H      |

[Aminoacids & HP](https://www.alfa-chemistry.com/resources/hydrophobicity-index-table-of-common-amino-acids.html)


In [1]:
import plotly.graph_objects as go
import numpy as np

In [4]:
# HP classification mapping
HP_MAP = {
    'A': 'H', 'C': 'H', 'I': 'H', 'L': 'H', 'M': 'H',
    'F': 'H', 'W': 'H', 'Y': 'H', 'V': 'H',
    'R': 'P', 'N': 'P', 'D': 'P', 'Q': 'P', 'E': 'P',
    'G': 'P', 'H': 'P', 'K': 'P', 'P': 'P', 'S': 'P', 'T': 'P'
}

def generate_3d_saw(sequence):
    """Generate 3D self-avoiding walk coordinates for the sequence."""
    # Directions in 3D space
    directions = {
        0: (1, 0, 0),   # right
        1: (-1, 0, 0),  # left
        2: (0, 1, 0),   # forward
        3: (0, -1, 0),  # backward
        4: (0, 0, 1),   # up
        5: (0, 0, -1)   # down
    }
    
    # Initialize
    coords = [(0, 0, 0)]
    visited = {(0, 0, 0)}
    hp_types = [HP_MAP.get(sequence[0], 'P')]
    
    # Generate self-avoiding walk
    for aa in sequence[1:]:
        current = coords[-1]
        possible_moves = []
        
        # Check all possible directions
        for dir_idx in range(6):
            dx, dy, dz = directions[dir_idx]
            new_pos = (current[0] + dx, current[1] + dy, current[2] + dz)
            
            # Avoid collisions
            if new_pos not in visited:
                possible_moves.append(new_pos)
        
        # If stuck, backtrack or extend in minimal direction
        if not possible_moves:
            # Simple fallback: try to find any unoccupied neighbor
            # by checking extended neighborhood
            found = False
            for dir_idx in range(6):
                dx, dy, dz = directions[dir_idx]
                new_pos = (current[0] + dx, current[1] + dy, current[2] + dz)
                if new_pos not in visited:
                    coords.append(new_pos)
                    visited.add(new_pos)
                    hp_types.append(HP_MAP.get(aa, 'P'))
                    found = True
                    break
            
            if not found:
                # Add position with minimal movement
                new_pos = (current[0] + 1, current[1], current[2])
                coords.append(new_pos)
                visited.add(new_pos)
                hp_types.append(HP_MAP.get(aa, 'P'))
        else:
            # Choose first available move
            new_pos = possible_moves[0]
            coords.append(new_pos)
            visited.add(new_pos)
            hp_types.append(HP_MAP.get(aa, 'P'))
    
    return coords, hp_types

def plot_hp_lattice_3d(sequence):
    """Plot 3D HP lattice using Plotly."""
    # Generate coordinates
    coords, hp_types = generate_3d_saw(sequence)
    
    # Separate H and P residues
    h_coords = [coords[i] for i, hp in enumerate(hp_types) if hp == 'H']
    p_coords = [coords[i] for i, hp in enumerate(hp_types) if hp == 'P']
    
    # Create figure
    fig = go.Figure()
    
    # Add hydrophobic residues (H) as red spheres
    if h_coords:
        h_x, h_y, h_z = zip(*h_coords)
        fig.add_trace(go.Scatter3d(
            x=h_x, y=h_y, z=h_z,
            mode='markers',
            marker=dict(size=12, color='red'),
            name='Hydrophobic (H)',
            text=[f'Residue {i+1}: {sequence[i]} (H)' 
                  for i, hp in enumerate(hp_types) if hp == 'H']
        ))
    
    # Add polar residues (P) as blue spheres
    if p_coords:
        p_x, p_y, p_z = zip(*p_coords)
        fig.add_trace(go.Scatter3d(
            x=p_x, y=p_y, z=p_z,
            mode='markers',
            marker=dict(size=10, color='blue', opacity=0.8),
            name='Polar (P)',
            text=[f'Residue {i+1}: {sequence[i]} (P)' 
                  for i, hp in enumerate(hp_types) if hp == 'P']
        ))
    
    # Add connections between residues
    x_coords, y_coords, z_coords = zip(*coords)
    fig.add_trace(go.Scatter3d(
        x=x_coords, y=y_coords, z=z_coords,
        mode='lines',
        line=dict(color='gray', width=4),
        name='Protein Backbone'
    ))
    
    # Update layout
    fig.update_layout(
        title=f'3D HP Lattice Model: {sequence}',
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z',
            aspectmode='cube',
            camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
        ),
        margin=dict(l=0, r=0, b=0, t=40),
        showlegend=True
    )
    
    fig.show()
    
    # Print statistics
    print(f"Sequence: {sequence}")
    print(f"Length: {len(sequence)} residues")
    print(f"H residues: {hp_types.count('H')}")
    print(f"P residues: {hp_types.count('P')}")
    print(f"Unique positions: {len(set(coords))}")

# Example usage
if __name__ == "__main__":
    # Test sequence - you can replace with your own
    test_sequence = "ACDEFGHIKLMNPQRSTVWY"
    
    # Generate and plot
    plot_hp_lattice_3d(test_sequence)

Sequence: ACDEFGHIKLMNPQRSTVWY
Length: 20 residues
H residues: 9
P residues: 11
Unique positions: 20
