<a href="https://colab.research.google.com/github/hongqin/AI4Health/blob/main/RL-protein-folding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://chatgpt.com/share/67a171cb-cc18-800c-a95f-a6077b3a9ebe

In [None]:
import numpy as np
import random
import gym
from gym import spaces
import tensorflow as tf
from tensorflow.keras import Sequential, Input
from tensorflow.keras.layers import Dense, Flatten
from collections import deque
import matplotlib.pyplot as plt

# Custom Protein Folding Environment
class ProteinFoldingEnv(gym.Env):
    def __init__(self, size=5):
        super(ProteinFoldingEnv, self).__init__()
        self.size = size
        self.grid = np.zeros((size, size))
        self.position = (size//2, size//2)
        self.path = [self.position]
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(low=0, high=1, shape=(size, size), dtype=np.float32)

    def reset(self):
        self.grid = np.zeros((self.size, self.size))
        self.position = (self.size//2, self.size//2)
        self.path = [self.position]
        return self._get_state()

    def step(self, action):
        x, y = self.position
        if action == 0:  # Up
            new_pos = (x-1, y)
        elif action == 1:  # Down
            new_pos = (x+1, y)
        elif action == 2:  # Left
            new_pos = (x, y-1)
        elif action == 3:  # Right
            new_pos = (x, y+1)

        if 0 <= new_pos[0] < self.size and 0 <= new_pos[1] < self.size:
            self.position = new_pos
            self.path.append(new_pos)
            reward = -self._compute_energy()
        else:
            reward = -10

        done = len(self.path) >= self.size
        return self._get_state(), reward, done, {}

    def _compute_energy(self):
        return sum(np.linalg.norm(np.array(self.path[i]) - np.array(self.path[i-1])) for i in range(1, len(self.path)))

    def _get_state(self):
        state = np.zeros((self.size, self.size))
        for pos in self.path:
            state[pos] = 1
        return state

    def render(self):
        plt.imshow(self._get_state(), cmap="Blues")
        plt.show()

# Deep Q-Network (DQN) Agent
class DQNAgent:
    def __init__(self, state_shape, action_size):
        self.state_shape = state_shape
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Input(shape=self.state_shape),  # Corrected input shape
            Flatten(),
            Dense(24, activation="relu"),
            Dense(24, activation="relu"),
            Dense(self.action_size, activation="linear")
        ])
        model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))  # Corrected `learning_rate`
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state.reshape(1, *self.state_shape), verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size=32):
        minibatch = random.sample(self.memory, min(len(self.memory), batch_size))
        for state, action, reward, next_state, done in minibatch:
            target = reward if done else reward + self.gamma * np.max(self.model.predict(next_state.reshape(1, *self.state_shape), verbose=0)[0])
            target_f = self.model.predict(state.reshape(1, *self.state_shape), verbose=0)
            target_f[0][action] = target
            self.model.fit(state.reshape(1, *self.state_shape), target_f, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Training
env = ProteinFoldingEnv(size=5)
state_shape = (env.size, env.size)
action_size = env.action_space.n
agent = DQNAgent(state_shape, action_size)
episodes = 500

for e in range(episodes):
    state = env.reset()
    total_reward = 0
    for _ in range(env.size * 2):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        if done:
            break
    agent.replay()
    if e % 50 == 0:
        print(f"Episode {e}, Reward: {total_reward}, Epsilon: {agent.epsilon:.2f}")

print("Training complete!")

# Visualization
env.reset()
for _ in range(env.size * 2):
    action = agent.act(state)
    state, _, done, _ = env.step(action)
    env.render()
    if done:
        break


Episode 0, Reward: -10.0, Epsilon: 0.99
Episode 50, Reward: -10.0, Epsilon: 0.77
