In [1]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from collections import deque 
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [4]:
X,y = make_classification(1000,150,random_state=50,weights=[0.9])
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=50)

In [5]:
pd.Series(y).value_counts()

0    896
1    104
dtype: int64

In [6]:
def rew(state):
    lr = LogisticRegression(C=state)
    lr.fit(X_train,y_train)
    y_pred = lr.predict(X_test)
    return f1_score(y_test, y_pred)

In [7]:
 actions = {0 : -10, 1 : -1, 2 : -0.1, 3 : -0.01, 4 : 0, 5 : 0.01,  6 : 0.1, 7 : 1, 8: 10}
 len(actions)

9

In [8]:
def step(action, state):
    state = state[0]
    
    r = rew(state)
   
    next_state = state + actions[action]

    return r, np.array([next_state])

In [9]:
%run -i ./DQN_Class.py
gamma=0.5
epsilon=.8
numberEpisodes= 1000
LearningQDeep=DeepQLearning(gamma,epsilon,numberEpisodes)


In [10]:
LearningQDeep.epsilon

0.8

In [11]:
%%time
LearningQDeep.trainingEpisodes()

Simulating episode 0
	 Max of rewards 0.4444444444444445
	 Min state 2.9067755144418808
	 Max state 5.00677551444188
Simulating episode 1
	 Max of rewards 0.4444444444444445
	 Min state 2.529136753544159
	 Max state 33.72913675354416
Simulating episode 2
	 Max of rewards 0.4444444444444445
	 Min state 28.600107078224237
	 Max state 48.60010707822424
Simulating episode 3
	 Max of rewards 0.4444444444444445
	 Min state 39.28772324801027
	 Max state 70.18772324801027
Simulating episode 4
	 Max of rewards 0.4444444444444445
	 Min state 38.45372763206694
	 Max state 89.35372763206695
Simulating episode 5
	 Max of rewards 0.4444444444444445
	 Min state 15.853190632850461
	 Max state 25.853190632850463
Simulating episode 6
	 Max of rewards 0.4444444444444445
	 Min state 37.57835977015366
	 Max state 38.57835977015366
Simulating episode 7
	 Max of rewards 0.4444444444444445
	 Min state 24.779620188645495
	 Max state 65.6796201886455
Simulating episode 8
	 Fist train of main network...
	 Max of

In [12]:
LearningQDeep.mainNetwork.save('Trained_model.h5')

In [13]:
loaded_model = LearningQDeep.mainNetwork

In [14]:
actions_str = {0 : '----', 1 : '---', 2 : '--', 3 : '-', 4 : '0', 5 : '+', 6 : '++',  7 : '+++',8: '++++'}

In [17]:
terminalState=False
currentState = np.array([5])
terminated = False
# print(currentState)
while not terminated:
    # get the Q-value (1 by 2 vector)
    Qvalues=loaded_model.predict(currentState.reshape(1,1))
    # select the action that gives the max Qvalue
    action=np.random.choice(np.where(Qvalues[0,:]==np.max(Qvalues[0,:]))[0])
    # print(action)

    # if you want random actions for comparison
    #action = env.action_space.sample()
    # apply the action
    r, currentState = step(action, currentState)
    if currentState<=0:
        terminated = True

    print(actions_str[action], r,currentState)
    # sum the rewards
    # sumObtainedRewards+=currentReward

+ 0.4444444444444445 [5.01]
+ 0.4444444444444445 [5.02]
+ 0.4444444444444445 [5.03]
+ 0.4444444444444445 [5.04]
+ 0.4444444444444445 [5.05]
+ 0.4444444444444445 [5.06]
+ 0.4444444444444445 [5.07]
+ 0.4444444444444445 [5.08]
+ 0.4444444444444445 [5.09]
+ 0.4444444444444445 [5.1]
+ 0.4444444444444445 [5.11]
+ 0.4444444444444445 [5.12]
+ 0.4444444444444445 [5.13]
+ 0.4444444444444445 [5.14]
+ 0.4444444444444445 [5.15]
+ 0.4444444444444445 [5.16]
+ 0.4444444444444445 [5.17]
+ 0.4444444444444445 [5.18]
+ 0.4444444444444445 [5.19]
+ 0.4444444444444445 [5.2]
+ 0.4444444444444445 [5.21]
+ 0.4444444444444445 [5.22]
+ 0.4444444444444445 [5.23]
+ 0.4444444444444445 [5.24]
+ 0.4444444444444445 [5.25]
+ 0.4444444444444445 [5.26]
+ 0.4444444444444445 [5.27]
+ 0.4444444444444445 [5.28]
+ 0.4444444444444445 [5.29]
+ 0.4444444444444445 [5.3]
+ 0.4444444444444445 [5.31]
+ 0.4444444444444445 [5.32]
+ 0.4444444444444445 [5.33]
+ 0.4444444444444445 [5.34]
+ 0.4444444444444445 [5.35]
+ 0.4444444444444445 [5

KeyboardInterrupt: 

In [None]:
rew(0.1)