<a href="https://colab.research.google.com/github/hongqin/AI4Health/blob/main/RL-protein-folding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://chatgpt.com/share/67a171cb-cc18-800c-a95f-a6077b3a9ebe

In [None]:
# Import necessary libraries
import numpy as np  # For numerical computations
import random  # For random action selection in early training
import gym  # For environment creation
from gym import spaces  # For defining action and observation spaces
import tensorflow as tf  # For building and training neural networks
from tensorflow.keras import Sequential, Input  # For defining a neural network model
from tensorflow.keras.layers import Dense, Flatten  # Dense & Flatten layers for deep learning model
from collections import deque  # For experience replay memory
import matplotlib.pyplot as plt  # For visualization

# ========================
# 1. Custom Environment: Protein Folding Simulation
# ========================
class ProteinFoldingEnv(gym.Env):
    """A simplified 2D grid environment for simulating protein folding."""

    def __init__(self, size=5):
        super(ProteinFoldingEnv, self).__init__()

        # Define the grid size (5x5 by default)
        self.size = size

        # Initialize a 2D grid to track the folding path
        self.grid = np.zeros((size, size))

        # Start position is at the center of the grid
        self.position = (size // 2, size // 2)

        # List to store the sequence of folding moves
        self.path = [self.position]

        # Define action space (4 possible movements: Up, Down, Left, Right)
        self.action_space = spaces.Discrete(4)

        # Define observation space as a 2D grid with binary values (0: empty, 1: occupied)
        self.observation_space = spaces.Box(low=0, high=1, shape=(size, size), dtype=np.float32)

    def reset(self):
        """Resets the environment to the initial state."""
        self.grid = np.zeros((self.size, self.size))  # Clear grid
        self.position = (self.size // 2, self.size // 2)  # Reset position
        self.path = [self.position]  # Reset path
        return self._get_state()

    def step(self, action):
        """Executes the given action and updates the environment."""
        x, y = self.position  # Get current position

        # Determine the new position based on the selected action
        if action == 0:  # Up
            new_pos = (x - 1, y)
        elif action == 1:  # Down
            new_pos = (x + 1, y)
        elif action == 2:  # Left
            new_pos = (x, y - 1)
        elif action == 3:  # Right
            new_pos = (x, y + 1)

        # Check if the new position is within grid boundaries
        if 0 <= new_pos[0] < self.size and 0 <= new_pos[1] < self.size:
            self.position = new_pos  # Update position
            self.path.append(new_pos)  # Store movement history
            reward = -self._compute_energy()  # Reward is negative energy (lower is better)
        else:
            reward = -10  # Large penalty for moving out of bounds

        # Episode ends when the sequence reaches the maximum allowed length
        done = len(self.path) >= self.size
        return self._get_state(), reward, done, {}

    def _compute_energy(self):
        """Computes a simplified energy function based on path length."""
        return sum(np.linalg.norm(np.array(self.path[i]) - np.array(self.path[i - 1])) for i in range(1, len(self.path)))

    def _get_state(self):
        """Returns the current protein folding state as a 2D binary grid."""
        state = np.zeros((self.size, self.size))  # Create an empty grid
        for pos in self.path:
            state[pos] = 1  # Mark positions visited by the protein
        return state

    def render(self):
        """Displays the current protein folding path in the grid."""
        plt.imshow(self._get_state(), cmap="Blues")  # Display the grid
        plt.show()

# ========================
# 2. Reinforcement Learning Agent (DQN)
# ========================
class DQNAgent:
    """A Deep Q-Network (DQN) agent to learn protein folding strategies."""

    def __init__(self, state_shape, action_size):
        self.state_shape = state_shape  # Shape of the environment state (grid)
        self.action_size = action_size  # Number of possible actions (4)

        # Memory buffer for experience replay (stores past experiences)
        self.memory = deque(maxlen=2000)

        # Q-learning parameters
        self.gamma = 0.95  # Discount factor for future rewards
        self.epsilon = 1.0  # Initial exploration probability (random actions)
        self.epsilon_min = 0.01  # Minimum exploration probability
        self.epsilon_decay = 0.995  # Decay rate for exploration probability
        self.learning_rate = 0.001  # Learning rate for the neural network

        # Build the deep learning model
        self.model = self._build_model()

    def _build_model(self):
        """Builds the neural network for Q-learning."""
        model = Sequential([
            Input(shape=self.state_shape),  # Define input layer with the correct shape
            Flatten(),  # Flatten 2D grid into a 1D vector
            Dense(24, activation="relu"),  # First hidden layer
            Dense(24, activation="relu"),  # Second hidden layer
            Dense(self.action_size, activation="linear")  # Output layer for Q-values
        ])
        # Compile the model with Mean Squared Error loss and Adam optimizer
        model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model

    def act(self, state):
        """Chooses an action using an epsilon-greedy strategy."""
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)  # Explore: Random action
        q_values = self.model.predict(state.reshape(1, *self.state_shape), verbose=0)
        return np.argmax(q_values[0])  # Exploit: Choose action with highest Q-value

    def remember(self, state, action, reward, next_state, done):
        """Stores experience in memory for training."""
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size=32):
        """Trains the model using experience replay."""
        minibatch = random.sample(self.memory, min(len(self.memory), batch_size))
        for state, action, reward, next_state, done in minibatch:
            target = reward if done else reward + self.gamma * np.max(self.model.predict(next_state.reshape(1, *self.state_shape), verbose=0)[0])
            target_f = self.model.predict(state.reshape(1, *self.state_shape), verbose=0)
            target_f[0][action] = target  # Update Q-value
            self.model.fit(state.reshape(1, *self.state_shape), target_f, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay  # Reduce exploration probability over time

# ========================
# 3. Training the Agent
# ========================
env = ProteinFoldingEnv(size=5)  # Initialize environment
agent = DQNAgent((env.size, env.size), env.action_space.n)  # Initialize agent
episodes = 500  # Number of training episodes

for e in range(episodes):
    state = env.reset()
    total_reward = 0
    for _ in range(env.size * 2):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        if done:
            break
    agent.replay()
    if e % 50 == 0:
        print(f"Episode {e}, Reward: {total_reward}, Epsilon: {agent.epsilon:.2f}")

print("Training complete!")

# ========================
# 4. Visualizing the Learned Folding Path
# ========================
env.reset()
for _ in range(env.size * 2):
    action = agent.act(state)
    state, _, done, _ = env.step(action)
    env.render()
    if done:
        break


Episode 0, Reward: -10.0, Epsilon: 0.99
