In [1]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from collections import deque 
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [4]:
X,y = make_classification(1000,150,random_state=50,weights=[0.9])
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=50)

In [5]:
pd.Series(y).value_counts()

0    896
1    104
dtype: int64

In [6]:
def rew(state):
    lr = LogisticRegression(C=state)
    lr.fit(X_train,y_train)
    y_pred = lr.predict(X_test)
    return f1_score(y_test, y_pred)

In [7]:
 actions = {0 : -10, 1 : -1, 2 : -0.1, 3 : -0.01, 4 : 0, 5 : 0.01,  6 : 0.1, 7 : 1, 8: 10}
 len(actions)

9

In [13]:
def step(action, state):
    state = state[0]
    next_state = state + actions[action]
    if next_state<0:
        r = -1
    else:
        r = rew(next_state)
    return r, np.array([next_state])

In [14]:
%run -i ./DQN_Class.py
gamma=0.5
epsilon=.8
numberEpisodes= 1000
LearningQDeep=DeepQLearning(gamma,epsilon,numberEpisodes)


In [15]:
LearningQDeep.epsilon

0.8

In [16]:
%%time
LearningQDeep.trainingEpisodes()

Simulating episode 0
	 Max of rewards -1
	 Min state 1
	 Max state 1
Simulating episode 1
	 Max of rewards 0.49122807017543857
	 Min state 0.9
	 Max state 10.9
Simulating episode 2
	 Max of rewards 0.4827586206896552
	 Min state 0.09000000000000008
	 Max state 136.43
Simulating episode 3
	 Max of rewards 0.4444444444444445
	 Min state 1.0
	 Max state 41.0
Simulating episode 4
	 Max of rewards -1
	 Min state 1
	 Max state 1
Simulating episode 5
	 Max of rewards 0.49122807017543857
	 Min state 0.09999999999999964
	 Max state 40.1
Simulating episode 6
	 Max of rewards 0.0
	 Min state 1
	 Max state 1
Simulating episode 7
	 Max of rewards 0.4444444444444445
	 Min state 1.0
	 Max state 31.91
Simulating episode 8
	 Max of rewards 0.4827586206896552
	 Min state 1.0
	 Max state 41.11
Simulating episode 9
	 Max of rewards 0.0
	 Min state 1
	 Max state 1
Simulating episode 10
	 Max of rewards 0.4827586206896552
	 Min state 1
	 Max state 1
Simulating episode 11
	 Fist train of main network...
	 Ma

In [17]:
LearningQDeep.mainNetwork.save('Trained_model.h5')

In [18]:
loaded_model = LearningQDeep.mainNetwork

In [19]:
actions_str = {0 : '----', 1 : '---', 2 : '--', 3 : '-', 4 : '0', 5 : '+', 6 : '++',  7 : '+++',8: '++++'}

In [21]:
terminalState=False
currentState = np.array([50])
terminated = False
# print(currentState)
while not terminated:
    # get the Q-value (1 by 2 vector)
    Qvalues=loaded_model.predict(currentState.reshape(1,1))
    # select the action that gives the max Qvalue
    action=np.random.choice(np.where(Qvalues[0,:]==np.max(Qvalues[0,:]))[0])
    # print(action)

    # if you want random actions for comparison
    #action = env.action_space.sample()
    # apply the action
    r, currentState = step(action, currentState)
    if currentState<=0:
        terminated = True

    print(actions_str[action], r,currentState)
    # sum the rewards
    # sumObtainedRewards+=currentReward

---- 0.4444444444444445 [40]
---- 0.4444444444444445 [30]
---- 0.4444444444444445 [20]
---- 0.4444444444444445 [10]
---- 0.0 [0]


In [None]:
rew(0.1)