# Main class for running the molecule environment

In [1]:
import gym
import import_ipynb
from gym import wrappers, logger
from MoleculeAgent import MoleculeAgent

importing Jupyter notebook from MoleculeAgent.ipynb


In [2]:

def main():
    """
    The main method class for the training environment for drug design. From this class, the env and agent object 
    are instantiated and the episode iterations are started.
    """
    env = gym.make("gym_molecule:molecule-v0")
    
    # Create a new Molecule Agent object. 
    agent = MoleculeAgent(env.action_space)
    
    # This can be user defined in the future.
    episode_count = 5
    reward = 0
    done = False
    
    print("The prototype optimisation goal is to reach the same number of atoms, bonds and conformers as the desired molecule.")
    print()
    # The optimisation goal that is hardcoded in the environment is Chem.MolFromSmiles("C=C-C-C=C-C-O").

    # The 'RL algorithm' learns for 5 episodes of undefined length.
    for i in range(0, episode_count):
        print("EPISODE:", i)
        
        # The env resets.
        ob = env.reset()
        
        
        # We have predetermined that the env reaches terminal state after 10 iterations.
        for x in range(10):
            
            # The agent 'chooses' its action based on the tuple from the current env state.
            action = agent.act(ob, reward, done)
            print("Agent's chosen action: ", action) 
            
            # The new tuple is returned once the env has considered the agent's action.
            ob, reward, done, _ = env.step(action)
            print("Env new state:", ob, "Reward to agent:",reward, "Done:",done, "Info: ", _)
            
            # The env renders. 
            env.render()
            
            
        
            # If the env reaches a terminating state, the current iteration ends and the next 
            # episode begins. 
            """"if done:
                print()
                break"""
            
            print()
        
    env.close()
    
    
if __name__ == '__main__':
    main()

Optimisation goal:
C=C-C-C=C-C-O
A: 7
B: 6
C: 0

The prototype optimisation goal is to reach the same number of atoms, bonds and conformers as the desired molecule.

EPISODE: 0
Env original state: 
[2 2 0]

RL Policy implemented.
Agent's chosen action:  2
... Checking molecular validity.
... Calculating molecular valency.
... Calculating reward based on chemical validity and molecule valency.
Env new state: [2 3 0] Reward to agent: 1 Done: False Info:  {}

RL Policy implemented.
Agent's chosen action:  2
... Checking molecular validity.
... Calculating molecular valency.
... Calculating reward based on chemical validity and molecule valency.
Env new state: [2 4 0] Reward to agent: 1 Done: False Info:  {}

RL Policy implemented.
Agent's chosen action:  3
... Checking molecular validity.
... Calculating molecular valency.
... Calculating reward based on chemical validity and molecule valency.
Env new state: [2 5 0] Reward to agent: 1 Done: False Info:  {}

RL Policy implemented.
Agent's 