In [None]:
import warnings
warnings.filterwarnings('ignore',category=FutureWarning)

import numpy as np
import gym
import sys
from tqdm import tqdm
import time

# pip install .
import rlfuzz as rf

# pip install tensorflow
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Activation, Flatten, Input, Concatenate
from tensorflow.keras.optimizers import Adam

# pip install keras-rl2
from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess

In [None]:
ENV_NAME = 'FuzzBase64-v0'
env = gym.make(ENV_NAME)

In [None]:
env.setDiscreteEnv()
print(env.action_space.n)
print(env.observation_space.shape)
nb_actions = env.action_space.n
nb_observation = env.observation_space.shape[0]

In [None]:
actor_input = Input(shape=(1,) + env.observation_space.shape, name='actor_observation_input')
f_actor_input = Flatten()(actor_input)
x = Dense(1024, activation='relu')(f_actor_input)
x = Dense(64, activation='relu')(x)
y = Dense(nb_actions, activation='tanh')(x)
actor = Model(inputs=actor_input, outputs=y, name='Actor')
actor.summary()

critic_action_input = Input(shape=(env.action_space.n), name='critic_action_input')
critic_observation_input = Input(shape=(1,) + env.observation_space.shape, name='critic_observation_input')
f_critic_observation_input = Flatten()(critic_observation_input)
x = Concatenate()([critic_action_input, f_critic_observation_input])
x = Dense(1024, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(1, activation='sigmoid')(x)
critic = Model(inputs=[critic_action_input, critic_observation_input], outputs=y, name='Critic')
critic.summary()

In [None]:
agent = DDPGAgent(nb_actions=nb_actions, 
                  actor=actor, 
                  critic=critic, 
                  critic_action_input=critic_action_input, 
                  memory=SequentialMemory(limit=100000, window_length=1), 
                  nb_steps_warmup_critic=1000, 
                  nb_steps_warmup_actor=1000, 
                  random_process=OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3), 
                  gamma=.99, 
                  target_model_update=1e-3
                 )
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

In [None]:
history = agent.fit(env, nb_steps=1500, visualize=False, verbose=1) # 执行nb_steps步，nb_max_episode_steps步后将done=True

# import pandas as pd
# pd.DataFrame(history.history).to_csv('../logs/rl_ddpg_{}_history.csv'.format(ENV_NAME))
# agent.save_weights('../model/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

In [None]:
from collections import Counter
print(Counter(env.mutate_history))
print(Counter([a for a,b in env.virgin_count]))
print(Counter([b for a,b in env.virgin_count]))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

def show_graghs(env, history):
    data = env.input_len_history
    plt.figure(figsize=(20,8))

    plt.subplot(221)
    plt.plot(data, marker='o', markersize=2, linewidth=1)
    plt.xlabel('step')
    plt.ylabel('length')

    plt.axhline(y=max(data), color='r', linewidth=1, linestyle='--')
    plt.text(0, max(data), str(max(data)), fontdict={'size': 8, 'color': 'r'})
    if len(history) > 0:
        for n in history['nb_steps']:
            plt.axvline(x=n, color='r', linewidth=1, linestyle='--')
            plt.text(n, 0, str(n), fontdict={'size': 8, 'color': 'r'})

    data = env.transition_count
    plt.subplot(222)
    plt.plot(data, marker='o', markersize=2, linewidth=1)
    plt.xlabel('step')
    plt.ylabel('transition_count')
    plt.axhline(y=max(data), color='r', linewidth=1, linestyle='--')
    plt.text(0, max(data), str(max(data)), fontdict={'size': 8, 'color': 'r'})
    plt.axhline(y=min(data), color='r', linewidth=1, linestyle='--')
    plt.text(0, min(data), str(min(data)), fontdict={'size': 8, 'color': 'r'})
    if len(history) > 0:
        for n in history['nb_steps']:
            plt.axvline(x=n, color='r', linewidth=1, linestyle='--')
    print('[+] Avg of last 1000 steps: {}'.format(sum(data[-1000:])/1000))

    data = env.reward_history
    plt.subplot(224)
    plt.plot(data, linewidth=1)
    plt.xlabel('step')
    plt.ylabel('reward_history')
    plt.axhline(y=max(data), color='r', linewidth=1, linestyle='--')
    plt.text(0, max(data), str(max(data)), fontdict={'size': 8, 'color': 'r'})
    plt.axhline(y=min(data), color='r', linewidth=1, linestyle='--')
    plt.text(0, min(data), str(min(data)), fontdict={'size': 8, 'color': 'r'})

    from collections import Counter
    data = env.mutate_history
    ct = Counter(data)
    plt.subplot(223)
    plt.barh(list(ct.keys()), [ ct[k] for k in ct.keys() ])
    plt.yticks(range(env.mutate_size), 
               ['EraseBytes', 'InsertByte', 'InsertRepeatedBytes', 'ChangeByte', 'ChangeBit', 
                'ShuffleBytes', 'ChangeASCIIInteger', 'ChangeBinaryInteger', 'CopyPart'])
    plt.xlabel('step')
    # plt.ylabel('action')

#     plt.savefig('../logs/rl_ddpg_{}.png'.format(ENV_NAME))

In [None]:
show_graghs(env, history.history)

In [None]:
# 加载训练模型
newAgent = DDPGAgent(nb_actions=nb_actions, 
                  actor=actor, 
                  critic=critic, 
                  critic_action_input=critic_action_input, 
                  memory=SequentialMemory(limit=100000, window_length=1), 
                  nb_steps_warmup_critic=2000, 
                  nb_steps_warmup_actor=2000, 
                  random_process=OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3), 
                  gamma=.99, 
                  target_model_update=1e-3
                 )
newAgent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
newAgent.load_weights('../model/ddpg_{}_weights.h5f'.format(ENV_NAME))

newEnv = gym.make(ENV_NAME)
start = time.time()
newHistory = newAgent.test(newEnv, visualize=False, nb_max_episode_steps=5000)
end = time.time()
print('[+] {} min(s)'.format((end - start) / 60))