In [243]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from collections import deque 
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [274]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [275]:
import plotly.express as px

In [276]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [277]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [278]:
X,y = make_classification(15_000,500,random_state=50,weights=[0.9])
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=50)

In [279]:
lr = LogisticRegression()
lr.fit(X_train,y_train)
pred_prob = lr.predict_proba(X_test)

In [373]:
def rew(state):
    y_pred = np.where(pred_prob[:,1]>state,1,0)
    return f1_score(y_test, y_pred)

In [374]:
fig = px.line(x = np.arange(0,1,0.001), y = [rew(t) for t in np.arange(0,1,0.001)])
fig.show()

In [375]:
 actions = {0 : -0.1, 1 : -0.01, 2 : 0.01, 3 : 0.1}
 len(actions)

4

In [376]:
def round_closest(s):
    sr = round(s,3)*100

    return (np.floor(sr) + round(round(abs(np.floor(sr) - sr)*10)/5)*5/10)/100

In [418]:
def step(action, state):
    s = state[0]
    rw = state[0]

    ns = s[0] + actions[action]
    rs = rew(ns)

    next_state = [[ns, s[0]],[rs, rw[0]]]

    if (ns<0) or (ns>1):
        r = -20
        terminated = True
        return r, np.array(next_state), terminated


    if rs > rw[0]: 
        r = 10
    else:
        r = -5


    # if (np.max(rw) > rs) and (np.max(rw) - rs > 0.005):
    #     terminated = True
    #     return r, np.array(next_state), terminated
    
    if np.any([ns == si for si in s]):
        terminated = True
    else:
        terminated = False

        
    return r, np.array(next_state), terminated

In [419]:
%run -i ./DQN_Class.py
gamma=0.5
epsilon=.8
numberEpisodes= 120
LearningQDeep=DeepQLearning(gamma,epsilon,numberEpisodes)


In [420]:
LearningQDeep.actionDimension

4

In [421]:
LearningQDeep.epsilon

0.8

In [422]:
LearningQDeep.mainNetwork.summary()

Model: "sequential_44"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_132 (Dense)           (None, 2, 128)            384       
                                                                 
 dense_133 (Dense)           (None, 2, 56)             7224      
                                                                 
 flatten_42 (Flatten)        (None, 112)               0         
                                                                 
 dense_134 (Dense)           (None, 4)                 452       
                                                                 
Total params: 8,060
Trainable params: 8,060
Non-trainable params: 0
_________________________________________________________________


In [423]:
%%time
LearningQDeep.trainingEpisodes()

Simulating episode 0
	 Max of rewards -20
	 Min state 0.825
	 Max state 0.855
Simulating episode 1
	 Max of rewards -30
	 Min state 0.5800000000000001
	 Max state 0.87
Simulating episode 2
	 Max of rewards 40
	 Min state 0.41000000000000003
	 Max state 0.71
Simulating episode 3
	 Max of rewards -15
	 Min state 0.785
	 Max state 0.805
Simulating episode 4
	 Max of rewards -45
	 Min state 0.6399999999999999
	 Max state 0.7999999999999999
Simulating episode 5
	 Max of rewards -20
	 Min state 0.995
	 Max state 0.995
Simulating episode 6
	 Max of rewards 30
	 Min state 0.14499999999999996
	 Max state 0.345
Simulating episode 7
	 Max of rewards -20
	 Min state 0.075
	 Max state 0.075
Simulating episode 8
	 Max of rewards -20
	 Min state 0.625
	 Max state 0.835
Simulating episode 9
	 Max of rewards 40
	 Min state 0.33999999999999997
	 Max state 0.46
Simulating episode 10
	 Max of rewards 50
	 Min state 0.22999999999999995
	 Max state 0.36
Simulating episode 11
	 Max of rewards 30
	 Min state 

In [181]:
LearningQDeep.mainNetwork.save('Trained_model.h5')

In [424]:
loaded_model = LearningQDeep.mainNetwork

In [425]:
actions_str = {0 : '--', 1 : '-', 2 : '+', 3 : '++'}

In [426]:
# def rew(state):
#     y_pred = np.where(pred_prob[:,1]>state,1,0)
#     return precision_score(y_test, y_pred)

In [431]:
s0 = [np.random.uniform(1,0),np.random.uniform(1,0)]
s0 = [round_closest(s) for s in s0]
r0 = [rew(s) for s in s0]
currentState = np.array([s0,r0])
# currentState = np.array([[s0[0],r0[0]],[s0[1],r0[1]]])
terminated = False
print(currentState)
while not terminated:
    Qvalues=loaded_model.predict(currentState.reshape(1,2,2),verbose=0)
    action=np.random.choice(np.where(Qvalues[0,:]==np.max(Qvalues[0,:]))[0])
    (r, currentState, terminated) = step(action, currentState)
    print(actions_str[action], r,currentState)

[[0.07       0.06      ]
 [0.54928391 0.53646677]]
+ 10 [[0.08       0.07      ]
 [0.56445993 0.07      ]]
++ 10 [[0.18       0.08      ]
 [0.64502165 0.08      ]]
++ 10 [[0.28      0.18     ]
 [0.6902439 0.18     ]]
- 10 [[0.27       0.28      ]
 [0.68848485 0.28      ]]
- 10 [[0.26       0.27      ]
 [0.68667467 0.27      ]]
- 10 [[0.25       0.26      ]
 [0.68408551 0.26      ]]
- 10 [[0.24       0.25      ]
 [0.67920094 0.25      ]]
- 10 [[0.23       0.24      ]
 [0.68451688 0.24      ]]
- 10 [[0.22      0.23     ]
 [0.6766055 0.23     ]]
- 10 [[0.21       0.22      ]
 [0.67272727 0.22      ]]
- 10 [[0.2        0.21      ]
 [0.66591676 0.21      ]]
-- 10 [[0.1        0.2       ]
 [0.58118081 0.2       ]]
- 10 [[0.09       0.1       ]
 [0.57322552 0.1       ]]
++ 10 [[0.19       0.09      ]
 [0.65711136 0.09      ]]
++ 10 [[0.29       0.19      ]
 [0.68888889 0.19      ]]
- 10 [[0.28      0.29     ]
 [0.6902439 0.29     ]]
- 10 [[0.27       0.28      ]
 [0.68848485 0.28      ]]
- 10

KeyboardInterrupt: 

In [430]:
def rew(state):
    y_pred = np.where(pred_prob[:,1]>state,1,0)
    return f1_score(y_test, y_pred)

In [429]:
fig = px.line(x = np.arange(0,1,0.01), y = [rew(t) for t in np.arange(0,1,0.01)])
fig.show()