In [1]:
import numpy as np
import pandas as pd
import scipy
from gym import error, spaces, utils
from collections import OrderedDict
from sklearn.externals import joblib as jb
from random  import randint
import time

## Let's set up everything

In [2]:
important_features=['source_app_bytes','remote_app_bytes','tcp_packets','remote_app_packets','vulume_bytes','source_app_packets']
n_states=len(important_features)
n_actions=scipy.math.factorial(n_states)
e = 0.9 # greedy factor
a= 0.1  # learning rate
g= 0.9 # discount factor
max_episodes = 13 # maximum episodes
fresh_time= 0.3 # fresh time for one move
adversarial_data=pd.read_csv("../Datasets/Colcom2018Cleaned.csv",header=0,sep=';')

In [3]:
adversarial_data.drop(adversarial_data.index,inplace=True)

In [4]:
adversarial_data.info

<bound method DataFrame.info of Empty DataFrame
Columns: [name, tcp_packets, dist_port_tcp, external_ips, vulume_bytes, udp_packets, source_app_packets, remote_app_packets, source_app_bytes, remote_app_bytes, dns_query_times, type]
Index: []>

In [5]:
adversarial_data = pd.DataFrame(np.zeros((1,len(list(adversarial_data.columns.values)))),columns=list(adversarial_data.columns.values)).append(adversarial_data, ignore_index=True).copy()
adversarial_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 12 columns):
name                  1 non-null float64
tcp_packets           1 non-null float64
dist_port_tcp         1 non-null float64
external_ips          1 non-null float64
vulume_bytes          1 non-null float64
udp_packets           1 non-null float64
source_app_packets    1 non-null float64
remote_app_packets    1 non-null float64
source_app_bytes      1 non-null float64
remote_app_bytes      1 non-null float64
dns_query_times       1 non-null float64
type                  1 non-null float64
dtypes: float64(12)
memory usage: 176.0 bytes


In [6]:
adversarial_data

Unnamed: 0,name,tcp_packets,dist_port_tcp,external_ips,vulume_bytes,udp_packets,source_app_packets,remote_app_packets,source_app_bytes,remote_app_bytes,dns_query_times,type
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
adversarial_data= adversarial_data.drop('type',axis=1).copy()
adversarial_data= adversarial_data.drop('name',axis=1).copy()

In [8]:
adversarial_data

Unnamed: 0,tcp_packets,dist_port_tcp,external_ips,vulume_bytes,udp_packets,source_app_packets,remote_app_packets,source_app_bytes,remote_app_bytes,dns_query_times
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
adversarial_data.shape

(1, 10)

## Let's build the Q-table

In [10]:
def build_q_table(n_states, actions):
    table = pd.DataFrame(
    np.zeros((actions, n_states)), # q_table initial values
    columns = important_features # Important features
    )
    return table

build_q_table(n_states, n_actions).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 720 entries, 0 to 719
Data columns (total 6 columns):
source_app_bytes      720 non-null float64
remote_app_bytes      720 non-null float64
tcp_packets           720 non-null float64
remote_app_packets    720 non-null float64
vulume_bytes          720 non-null float64
source_app_packets    720 non-null float64
dtypes: float64(6)
memory usage: 33.8 KB


## Let's choose the action

In [11]:
def choose_action(state, q_table):
    # This is how to choose an action
    state_actions = q_table.iloc[state, :]
    if (np.random.uniform() > e) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
        action_name = np.random.choice(important_features)
    else:   # act greedy
        action_name = state_actions.idxmax()   # replace argmax to idxmax as argmax means a different function in newer version of pandas
    return action_name

choose_action(4,build_q_table(n_states, n_actions))

'tcp_packets'

## Alright, let's upload the best model to attack

In [12]:
model=jb.load('rfDefense.sav')
print(model)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=50, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=250, n_jobs=None,
            oob_score=False, random_state=45, verbose=0, warm_start=False)


## Feedback...

In [13]:
def get_env_feedback(S, A):
    # This is how agent will interact with the environment
    pred = model.predict(A)
    print(pred)
    if pred == 'benign':    # it is detected as benign
        if S == n_states - 2:
            R = 2
            S_ = 'terminal'            
        else:
            S_ = S + 1
            R = 1
    else:
        R = 0
        if S == 0:
            S_ = S  # reach the defense
        else:
            S_ = S - 1
    return S_, R   
    
#get_env_feedback(1, complete_permissions)

In [22]:
def update_env(S, episode, step_counter):
    # This is how environment be updated
    env_list = ['-']*(n_states-1) + ['D']   # '---------T' our environment
    if S == 'terminal':
        interaction = 'Episode %s: total_steps = %s' % (episode+1, step_counter)
        print('\r{}'.format(interaction), end='')
        time.sleep(2)
        print('\r                                ', end='')
    else:
        if n_states - S == 2:
            env_list[S] = 'M'
            interaction = ''.join(env_list)
            print('\r{}'.format(interaction), end='')
            time.sleep(fresh_time)
        else:            
            env_list[S] = 'B'
            interaction = ''.join(env_list)
            print('\r{}'.format(interaction), end='')
            time.sleep(fresh_time)

In [23]:
def rl():
    q_table = build_q_table(n_states, n_actions)
   
    for episode in range(max_episodes):
        adv_data = adversarial_data.copy()
        step_counter = 0
        S = 0
        is_terminated = False
        update_env(S, episode, step_counter)
        while not is_terminated:                
            adv_data.loc[0,'source_app_bytes'] = randint(0,9589585)
            adv_data.loc[0,'remote_app_bytes'] = randint(69,727621)
            adv_data.loc[0,'tcp_packets'] = randint(0,4704)
            adv_data.loc[0,'remote_app_packets'] = randint(0,6690)    
            adv_data.loc[0,'vulume_bytes'] = randint(0,726286)
            adv_data.loc[0,'source_app_packets'] = randint(1,4721)
            S_, R = get_env_feedback(S, adv_data)  # take action & get next state and reward
            q_predict = q_table.loc[S, important_features]
            if S_ != 'terminal':
                q_target = R + g * q_table.iloc[S_, :].max()   # next state is not terminal
            else:
                q_target = R     # next state is terminal
                is_terminated = True    # terminate this episode  
 
            q_table.loc[S, important_features] += a * (q_target - q_predict)  # update
            S = S_  # move to next state  
            update_env(S, episode, step_counter+1)
            step_counter += 1
    return q_table, adv_data

In [24]:
# let's start it 
q_table, a_permissions = rl()
print('\r\nQ-table:\n')
print(q_table)

B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign']
B----D['benign

KeyboardInterrupt: 

In [None]:
if model.predict(adv_) == 'malicious' :
    print("Malware developed")
else:
    print("Benign app")