In [165]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from collections import deque 
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [None]:
import plotly.express as px

In [166]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [167]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [262]:
X,y = make_classification(5000,150,random_state=50,weights=[0.9])
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=50)

In [263]:
lr = LogisticRegression()
lr.fit(X_train,y_train)
pred_prob = lr.predict_proba(X_test)

In [264]:
def rew(state):
    y_pred = np.where(pred_prob[:,1]>state,1,0)
    return f1_score(y_test, y_pred)

In [265]:
fig = px.line(x = np.arange(0,1,0.001), y = [rew(t) for t in np.arange(0,1,0.001)])
fig.show()

In [266]:
 actions = {0 : -0.1, 1 : -0.01, 2 : 0.01, 3 : 0.1}
 len(actions)

4

In [267]:
def step(action, state):
    t1 = state[0]
    r1 = state[1]
    t2 = state[2]
    r2 = state[3]

    ns = t1 + actions[action]
    rs = rew(ns)

    rls = [ns,r1,r2]

    if rs == np.max(rls):
        r = 10
    elif rs > r1:
        r = 5
    elif rs > r2:
        r = 5
    elif rs == r1 == r2:
        r = 0
    elif rs == np.min(rls):
        r = -5
    else:
        r=-10

    next_state = [ns,rs,t1,r1]

    if ns == t2:
        terminated = True
    else:
        terminated = False
        
    return r, np.array(next_state), terminated

In [268]:
%run -i ./DQN_Class.py
gamma=0.5
epsilon=.8
numberEpisodes= 120
LearningQDeep=DeepQLearning(gamma,epsilon,numberEpisodes)


In [269]:
LearningQDeep.actionDimension

4

In [270]:
LearningQDeep.epsilon

0.8

In [271]:
%%time
LearningQDeep.trainingEpisodes()

Simulating episode 0
	 Max of rewards 10
	 Min state 0.028629303912181304
	 Max state 0.6218487394957982
Simulating episode 1
	 Max of rewards 5
	 Min state 0.5
	 Max state 0.7535114656176038
Simulating episode 2
	 Max of rewards 5
	 Min state 0.4974093264248704
	 Max state 0.8111975319386461
Simulating episode 3
	 Max of rewards 5
	 Min state 0.2994011976047904
	 Max state 0.940163310677717
Simulating episode 4
	 Max of rewards 0
	 Min state 0.5
	 Max state 0.701243256448701
Simulating episode 5
	 Max of rewards 5
	 Min state 0.5
	 Max state 0.7861658107800165
Simulating episode 6
	 Max of rewards 5
	 Min state 0.5
	 Max state 0.698601999939188
Simulating episode 7
	 Max of rewards 10
	 Min state 0.08747842922015923
	 Max state 0.6218487394957982
Simulating episode 8
	 Max of rewards 10
	 Min state 0.2759933065583441
	 Max state 0.6218487394957982
Simulating episode 9
	 Max of rewards 10
	 Min state 0.3200839306799015
	 Max state 0.6451612903225806
Simulating episode 10
	 Max of rewar

In [126]:
LearningQDeep.mainNetwork.save('Trained_model.h5')

In [272]:
loaded_model = LearningQDeep.mainNetwork

In [273]:
actions_str = {0 : '--', 1 : '-', 2 : '+', 3 : '++'}

In [274]:
# def rew(state):
#     y_pred = np.where(pred_prob[:,1]>state,1,0)
#     return precision_score(y_test, y_pred)

In [287]:
terminalState=False

# s0 = 0.5
# r0 = rew(s0)
s0 = np.random.uniform(0,1)
r0 = rew(s0)
si = np.random.uniform(0,1)
ri = rew(si)
currentState = np.array([si,ri,s0,r0])

# currentState = np.array([0.49])
terminated = False
# print(currentState)

while not terminated:
# for _ in range(5):
    # get the Q-value (1 by 2 vector)
    Qvalues=loaded_model.predict(currentState.reshape(1,4),verbose=0)
    # select the action that gives the max Qvalue
    action=np.random.choice(np.where(Qvalues[0,:]==np.max(Qvalues[0,:]))[0])
    # print(action)

    # if you want random actions for comparison
    #action = env.action_space.sample()
    # apply the action
    (r, currentState, terminated) = step(action, currentState)

    if r<0:
        terminated = True
    # if currentState<=0:
        # terminated = True

    print(actions_str[action], r,currentState)
    # sum the rewards
    # sumObtainedRewards+=currentReward

-- 5 [0.86575625 0.90243902 0.96575625 0.95652174]
+ 5 [0.87575625 0.92307692 0.86575625 0.90243902]
-- 5 [0.77575625 0.91071429 0.87575625 0.92307692]
+ -10 [0.78575625 0.90740741 0.77575625 0.91071429]


In [283]:
def rew(state):
    y_pred = np.where(pred_prob[:,1]>state,1,0)
    return precision_score(y_test, y_pred)

In [284]:
fig = px.line(x = np.arange(0,1,0.001), y = [rew(t) for t in np.arange(0,1,0.001)])
fig.show()

In [277]:
np.max([rew(t) for t in np.arange(0,1,0.001)])

0.6451612903225806

In [140]:
currentState

array([0.63094663])