In [1]:
!pip install torch



In [2]:
!pip install pandas



In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import gym
# from gym import spaces
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
import torch.optim as optim
from torch.nn.functional import one_hot
# from rdkit import Chem
# from rdkit.Chem import Crippen
import random

In [4]:
state_size = 4
standard_amino_acids = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
action_space = np.arange(0, 20)

In [5]:
class PolicyNetwork(nn.Module):
    def __init__(self, state_size, action_space):
        super(PolicyNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, 128)
        self.fc2 = nn.Linear(128, len(action_space))

    def forward(self, state):
        x = torch.relu(self.fc1(state))
        action_probs = torch.softmax(self.fc2(x), dim=0)
        return action_probs
    #needs layers to improve peptide assembly

In [6]:
policy_net = PolicyNetwork(state_size, action_space)
policy_net #gives 20 probabilities for the 20 standard amino acids.

PolicyNetwork(
  (fc1): Linear(in_features=4, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=20, bias=True)
)

In [7]:
# s = [-1,-1,-1,0]
# se = []
# for i in range(3):
#     se.append(s)
#     for j in range(len(s)):
#         if s[j] == -1:
#             s[j] = 9
#             break
#     s[3] = s[3] + 1
    
# print(se)

In [8]:
learning_rate = 0.001
num_episodes = 500
len_peptide = 3
max_steps = len_peptide
gamma = 0.99

In [9]:
# Define your reward function
def compute_discounted_rewards(rewards, gamma):
    discounted_rewards = [rewards[-1]]
    for i in range(len(rewards) - 2, -1, -1):
        discounted_rewards.insert(0, rewards[i] + gamma * discounted_rewards[0])
    return discounted_rewards

In [10]:
# Define your training loop
def train():
    # Set up your optimizer
    optimizer = optim.Adam(policy_net.parameters(), lr= learning_rate) 

    # Training loop
    for episode in range(num_episodes):
        # Initialize the state
        state = [-1,-1,-1,0] 

        # Lists to store the trajectory
        states = []
        actions = []
        rewards = []

        # Collect trajectory by interacting with the environment
        for step in range(max_steps):
        # Convert the state to a PyTorch tensor
            
            state_tensor = torch.tensor(state, dtype=torch.float32)
            # Forward pass to get action probabilities
            
            action_probs = policy_net(state_tensor)

            # Sample an action from the action probabilities
            action = torch.multinomial(action_probs, 1).item()

            # Execute the action and observe the next state and reward 
            next_state = action
            reward = action_probs[action].item()
                    
            # Store the trajectory
            states.append(state)
            actions.append(action)
            rewards.append(reward)

            # Update the state
            for i in range(len(state)):
                if state[i] == -1:
                    state[i] = next_state
                    break
            state[3] = state[3] + 1

        # Compute discounted rewards
        discounted_rewards = compute_discounted_rewards(rewards, gamma)

        # Convert trajectory to tensors
        state_tensor = torch.tensor(states, dtype=torch.float32)
        action_tensor = torch.tensor(actions, dtype=torch.int64)
        reward_tensor = torch.tensor(discounted_rewards, dtype=torch.float32)

        # Compute the loss
        action_probs = policy_net(state_tensor)
        selected_action_probs = action_probs.gather(1, action_tensor.unsqueeze(1)).squeeze()
        loss = -torch.mean(torch.log(selected_action_probs) * reward_tensor)

        # Update the policy network
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print training progress
        print(f"Episode: {episode}, Loss: {loss}")

In [11]:
train()

Episode: 0, Loss: 0.5430271625518799
Episode: 1, Loss: 0.9528985023498535
Episode: 2, Loss: 0.9086907505989075
Episode: 3, Loss: 0.18107479810714722
Episode: 4, Loss: 0.6095492839813232
Episode: 5, Loss: 0.11210090667009354
Episode: 6, Loss: 0.4600731134414673
Episode: 7, Loss: 0.2432212233543396
Episode: 8, Loss: 0.10943662375211716
Episode: 9, Loss: 0.8772689700126648
Episode: 10, Loss: 0.7673117518424988
Episode: 11, Loss: 0.18759524822235107
Episode: 12, Loss: 0.6430943608283997
Episode: 13, Loss: 0.13941650092601776
Episode: 14, Loss: 0.8355398178100586
Episode: 15, Loss: 0.1703762412071228
Episode: 16, Loss: 0.11849549412727356
Episode: 17, Loss: 0.2464107722043991
Episode: 18, Loss: 0.11423813551664352
Episode: 19, Loss: 0.3126469552516937
Episode: 20, Loss: 0.39979493618011475
Episode: 21, Loss: 0.248043492436409
Episode: 22, Loss: 0.07659133523702621
Episode: 23, Loss: 0.8324288725852966
Episode: 24, Loss: 0.149540513753891
Episode: 25, Loss: 0.26571550965309143
Episode: 26, L