In [1]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from collections import deque 
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [4]:
X,y = make_classification(1000,150,random_state=50,weights=[0.9])
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=50)

In [5]:
lr = LogisticRegression()
lr.fit(X_train,y_train)
pred_prob = lr.predict_proba(X_test)

In [6]:
def rew(state):
    y_pred = np.where(pred_prob[:,1]>state,1,0)
    return f1_score(y_test, y_pred)

In [7]:
 actions = {0 : -0.4, 1 : -0.1, 2 : -0.01, 3 : -0.001, 4 : 0, 5 : 0.001,  6 : 0.01, 7 : 0.1, 8: 0.4}
 len(actions)

9

In [8]:
# def step(action, state):
#     state = state[0]
    
#     r = rew(state)
   
#     next_state = state + actions[action]

#     return r, np.array([next_state])

In [9]:
def step(action, state):
    state = state[0]
    next_state = state + actions[action]
    r = rew(next_state)
    return r, np.array([next_state])

In [10]:
# def step(action, state):
#     state = state[0]

#     r_state = rew(state)
#     next_state = state + actions[action]
#     r_next = rew(next_state)

#     if r_next > r_state:
#         r = 2
#     elif r_next == r_state:
#         r = 0
#     else:
#         r = -1
        
#     return r, np.array([next_state])

In [11]:
%run -i ./DQN_Class.py
gamma=0.5
epsilon=.8
numberEpisodes= 110
LearningQDeep=DeepQLearning(gamma,epsilon,numberEpisodes)


In [12]:
LearningQDeep.epsilon

0.8

In [13]:
%%time
LearningQDeep.trainingEpisodes()

Simulating episode 0
	 Max of rewards 0.4615384615384615
	 Min state 0.7749029087518017
	 Max state 0.9749029087518016
Simulating episode 1
	 Max of rewards 0.4827586206896552
	 Min state 0.020088000313616468
	 Max state 0.5300880003136165
Simulating episode 2
	 Max of rewards 0.4571428571428571
	 Min state 0.2204887902900764
	 Max state 0.2234887902900764
Simulating episode 3
	 Max of rewards 0.4571428571428571
	 Min state 0.03191183115516505
	 Max state 0.24291183115516507
Simulating episode 4
	 Max of rewards 0.4571428571428571
	 Min state 0.22605224744537167
	 Max state 0.23005224744537167
Simulating episode 5
	 Max of rewards 0.4814814814814815
	 Min state 0.6886771572314004
	 Max state 0.7886771572314004
Simulating episode 6
	 Max of rewards 0.0
	 Min state 0.7696958044817527
	 Max state 0.7696958044817527
Simulating episode 7
	 Max of rewards 0.49122807017543857
	 Min state 0.18456743756248203
	 Max state 0.6855674375624821
Simulating episode 8
	 Max of rewards 0.399999999999999

In [17]:
LearningQDeep.mainNetwork.save('Trained_model.h5')

In [14]:
loaded_model = LearningQDeep.mainNetwork

In [15]:
actions_str = {0 : '----', 1 : '---', 2 : '--', 3 : '-', 4 : '0', 5 : '+', 6 : '++',  7 : '+++',8: '++++'}

In [26]:
def rew(state):
    y_pred = np.where(pred_prob[:,1]>state,1,0)
    return precision_score(y_test, y_pred)

In [27]:
terminalState=False
# currentState = np.array([np.random.uniform(0,1)])
currentState = np.array([1])
terminated = False
# print(currentState)
# while not terminated:
for _ in range(15):
    # get the Q-value (1 by 2 vector)
    Qvalues=loaded_model.predict(currentState.reshape(1,1))
    # select the action that gives the max Qvalue
    action=np.random.choice(np.where(Qvalues[0,:]==np.max(Qvalues[0,:]))[0])
    # print(action)

    # if you want random actions for comparison
    #action = env.action_space.sample()
    # apply the action
    r, currentState = step(action, currentState)

    if r<0:
        terminated = True
    # if currentState<=0:
        # terminated = True

    print(actions_str[action], r,currentState)
    # sum the rewards
    # sumObtainedRewards+=currentReward

---- 0.5 [0.6]
- 0.48148148148148145 [0.599]
- 0.48148148148148145 [0.598]
- 0.48148148148148145 [0.597]
- 0.48148148148148145 [0.596]
+ 0.48148148148148145 [0.597]
- 0.48148148148148145 [0.596]
+ 0.48148148148148145 [0.597]
- 0.48148148148148145 [0.596]
+ 0.48148148148148145 [0.597]
- 0.48148148148148145 [0.596]
+ 0.48148148148148145 [0.597]
- 0.48148148148148145 [0.596]
+ 0.48148148148148145 [0.597]
- 0.48148148148148145 [0.596]


In [28]:
currentState

array([0.596])

In [29]:
import plotly.express as px

In [30]:
fig = px.line(x = np.arange(0,1,0.001), y = [rew(t) for t in np.arange(0,1,0.001)])
fig.show()