In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from environment import RNAInvEnvironment, make_vec_env, Monitor
from RNA_helper import get_puzzle
import torch as th
from models import EmbeddinsFeatureExtractor
from stable_baselines3.common import logger

In [4]:
# 1, 41, 84, 92, 97
puzzle_idx=84
objective_structure, sequence, puzzle_name = get_puzzle(idx=puzzle_idx, return_name=True, verbose=False)
len(objective_structure)

389

In [5]:
max_steps = 1
features_dim = 512
EMBEDDING_DIM = 3
model_name = puzzle_name.lower().replace(' ', '_') + f'_{features_dim}_{EMBEDDING_DIM}'
print(model_name)

snowflake_necklace_(_or_v2.0_)_512_3


In [6]:
env_kwargs = {
    'objective_structure': objective_structure,
    'max_steps': max_steps,
    'tuple_obs_space': True,
    'metric_type': 'total_distance',
    'sequences_file': f'solved_puzzles/{model_name}.txt'
}

In [7]:
n_envs=12
env = make_vec_env(RNAInvEnvironment, n_envs=n_envs, env_kwargs=env_kwargs)
# env = RNAInvEnvironment(objective_structure=objective_structure, max_steps=max_steps, tuple_obs_space=True)



In [8]:
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.policies import ActorCriticPolicy

In [9]:
# Teste embeddings

obs = env.reset()
efe = EmbeddinsFeatureExtractor(env.observation_space, EMBEDDING_DIM=EMBEDDING_DIM, features_dim=features_dim).cuda()
out = efe(th.as_tensor(obs).cuda())
obs.shape, out.shape

((12, 389), torch.Size([12, 512]))

In [10]:
# th.as_tensor(env.observation_space.sample())

In [11]:
policy_kwargs = dict(
    features_extractor_class=EmbeddinsFeatureExtractor,
    features_extractor_kwargs=dict(EMBEDDING_DIM=EMBEDDING_DIM, features_dim=features_dim),
)

In [12]:
model = PPO(
    ActorCriticPolicy,
    env,
    verbose=1,
    tensorboard_log='tensorboard_logs',
    n_steps=512,
    gamma=0.99,
    policy_kwargs=policy_kwargs
)

Using cuda device


In [13]:
# log_path = f"logs/{model_name}"
# # set up logger
# new_logger = logger.configure(log_path, ["stdout", "csv", "log", "tensorboard", "json"])
# model.set_logger(new_logger)

In [14]:
# eval_env = make_vec_env(
#     RNAInvEnvironment, n_envs=1,
#     env_kwargs={'objective_structure': objective_structure, 'max_steps': max_steps, 'tuple_obs_space': True}
# )

eval_env = make_vec_env(
    RNAInvEnvironment, n_envs=1,
    env_kwargs=env_kwargs,
    monitor_dir=f'logs/{model_name}',
    monitor_kwargs={'info_keywords': ('free_energy', 'structure_distance', 'energy_to_objective', 'energy_reward', 'distance_reward')}
)

In [15]:
eval_callback = EvalCallback(
    eval_env = eval_env,
    eval_freq=512*5,
    n_eval_episodes=1024,
    deterministic=True,
    verbose=1,
    best_model_save_path=f'models/{model_name}',
)

In [None]:
%%time
model.learn(
    total_timesteps=1_000_000,
    tb_log_name=model_name,
    callback=[eval_callback]
)

Logging to tensorboard_logs/snowflake_necklace_(_or_v2.0_)_512_3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 0.29     |
| time/              |          |
|    fps             | 5        |
|    iterations      | 1        |
|    time_elapsed    | 1127     |
|    total_timesteps | 6144     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | 0.294     |
| time/                   |           |
|    fps                  | 5         |
|    iterations           | 2         |
|    time_elapsed         | 2394      |
|    total_timesteps      | 12288     |
| train/                  |           |
|    approx_kl            | 3.6588523 |
|    clip_fraction        | 0.807     |
|    clip_range           | 0.2       |
|    entropy_loss         | -439      |
|    explained_variance   | -151 

In [68]:
from evaluation import evaluate_policy

In [None]:
Monitor(

In [142]:
info_keywords = ('free_energy',)
reset_keywords = ('free_energy',)

In [143]:
info_keywords + reset_keywords

('free_energy', 'free_energy')

In [None]:
'free_energy': energy,
            'folding_struc': current_structure,
            'structure_distance': new_objective_distance,
            'energy_to_objective': new_energy,
            'energy_reward': energy_reward,
            'distance_reward': distance_reward

In [147]:
# Monitor(filename='logs/test')


eval_env = make_vec_env(
    RNAInvEnvironment, n_envs=1,
    env_kwargs=env_kwargs,
    monitor_dir='logs',
    monitor_kwargs={'info_keywords': ('free_energy', 'folding_struc', 'structure_distance', 'energy_to_objective', 'energy_reward', 'distance_reward')}
)
# eval_env = RNAInvEnvironment(**env_kwargs)

In [148]:
mean_reward, std_reward, mean_length = evaluate_policy(model, eval_env, n_eval_episodes=1, deterministic=False)

In [146]:
mean_reward, std_reward, mean_length

(211.800003, 0.0, 1.0)

In [99]:
d = {'free_energy': -280.5, 'folding_struc': '(((((((((((((((.((.((.((((((((((.(((...))).)))))))).((.((..((.((((..(.(((.((((.(((..(((.(((((.((.((((((.((.((((..((.((.....)).)).)))).)).)))))).)).))))).)))..))).)))).))).)..)).)).))..)).)).((((.(.((........)).).)))).(((((((((.((....)).)))))))))....(((((.(((((((((.(((....((.(((....(.((..(..((.(((((.(((.....))).))))).))..)..)).)......))).))....))).))))))))).))))).)).)).)).)))))))))))))))', 'structure_distance': 0.37630662020905925, 'energy_to_objective': -172.6999969482422, 'energy_reward': 200.89999771118164, 'distance_reward': 0.04878048780487809, 'episode': {'r': 200.899998, 'l': 1, 't': 795.51385}, 'terminal_observation': [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, 3, 0, 0, 3, 0, 3, 3, 1, 0, 0, 3, 0, 0, 3, 0, 1, 1, 3, 0, 0, 3, 0, 0, 3, 0, 3, 1, 2, 0, 0, 3, 0, 0, 3, 0, 0, 2, 2, 0, 0, 3, 0, 0, 3, 0, 1, 0, 2, 0, 0, 3, 0, 0, 0, 3, 3, 3, 0, 0, 3, 3, 0, 2, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 0, 1, 1, 0, 0, 3, 0, 0, 3, 0, 1, 0, 2, 0, 0, 3, 0, 0, 3, 0, 1, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0, 3, 0, 0, 3, 0, 0, 3, 0, 2, 1, 3, 0, 0, 3, 0, 0, 3, 3, 3, 3, 0, 0, 3, 3, 0, 2, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 3, 3, 0, 0, 3, 0, 2, 2, 1, 2, 0, 3, 0, 0, 3, 0, 2, 1, 1, 0, 0, 3, 0, 0, 3, 0, 1, 0, 3, 2, 0, 3, 0, 0, 3, 0, 0, 2, 0, 2, 0, 3, 0, 0, 0, 0, 3, 0, 1, 2, 3, 3, 0, 0, 0, 3, 3, 3, 0, 0, 3, 3, 0, 1, 1, 3, 0, 0, 0, 3, 0, 0, 0, 3, 3, 3, 0, 0, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 1, 0, 0, 0, 3, 0, 0, 3, 0, 1, 2, 3, 0, 0, 3, 0, 0, 3, 0, 0, 2, 3, 0, 0, 3, 0, 0, 3, 0, 2, 0, 2, 0, 0, 3, 0, 0, 0, 0, 3, 3, 0, 0, 3, 3, 0, 1, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3, 3, 3, 0, 0, 3, 0, 2, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 2, 1, 2, 3, 3, 0, 0, 3, 0, 2, 2, 2, 0, 0, 3, 0, 0, 3, 0, 3, 1, 3, 2, 0, 3, 0, 0, 3, 0, 3, 1, 1, 0, 0, 3, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3]}

In [100]:
d.keys()

dict_keys(['free_energy', 'folding_struc', 'structure_distance', 'energy_to_objective', 'energy_reward', 'distance_reward', 'episode', 'terminal_observation'])

In [34]:
import RNA
sol_1 = 'CUGCUUGGUUUGGGCCCUUUCUUUCCCCGCCCUAUACGGGGGUAAUCGUGGUCAAGGGGGUCGGUGGAUUGACCGAACUUUGAUUCACGGUUAUUUCGAUGUGGGGCAAUAAGUAG'
sol_2 = 'CGUUCGGCCAUGUCCUCGAAAAAUUGACGCUAAACGGCAUACCGGCGAUUUUUAGGCUACUCACCGAAAAGGGUGGCGCUUGACAGAUUGUUGGUGUUACGUUUGGCACCCGAAUG'
sol_3 = 'UUGACCCUGGGCGUGGUGUGGGUGAACGAGCAGUGUCUGAAUAUUUUAGUCCACCCUUGCUGGGGCCUCUAUUCUAUAGGGUGAGGUUGGAAUAUUUUAAUAUUGUUGUGGGUCGA'
sol_4 = 'GGUGGAGAAUAUCGCCUAGCCCUAACGAGCGCGCAUAUAUAGGCUUUUGACGGUGUGCAGCAGAGUGGACCCUCUGAUACGCUAGUUGGAGGCUUGUUAGUGUGUGUAACUCCACC'

objective_structure_simple_1 = '(((((((....(((...........)))((((((((..(((((((((((((((((((...(((((......))))).)))))).)))))))))))))..))))))))..)))))))'

sec_struct, energy = RNA.fold(sol_3)
sec_struct == objective_structure_simple_1

True

In [136]:
sec_struct == objective_structure

True

In [None]:
model.learn(total_timesteps=25000)
model.save("ppo_cartpole")