# Deep Reinforcement Learning Agent

In [None]:
# Install Dependencies
%pip install tensorflow-cpu
%pip install gym
%pip install keras
%pip install keras-rl

## Umgebung definieren
Wir erstellen eine einfache Umgebung (Environment) in OpenAI Gym, die den Benutzername und das Passwort als Observation liefert.

In [None]:
# Imports
import gym
from gym import spaces
import numpy as np

In [None]:
class LoginEnv(gym.Env):
    def __init__(self):
        super(LoginEnv, self).__init__()
        
        # Zustandseigenschaften: Richtiges Passwort (boolean), Zeit zwischen Loginversuchen (date), Falsches Passwort Zähler (int), letzte Aktion (int)
        self.observation_space = spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
        
        # Aktionen: 0 = Nicht sperren, 1 = 30s sperren, 2 = 1m sperren, 3 = 3min sperren, 4 = Dauerhaft sperren
        self.action_space = spaces.Discrete(5)
        
        # Interne Zustandsvariablen
        self.incorrect_password = False
        self.time_between_attempts = np.random.randint(0, 3600) # 1 sec to 1h (in seconds)
        self.incorrect_password_count = 0 
        self.last_action = 0
    
    def reset(self):
        self.incorrect_password = np.random.choice([True, False])
        self.time_between_attempts = np.random.randint(0, 3600) # 1 sec bis 1h (in Sekunden)
        self.incorrect_password_count = np.random.randint(0, 11)
        self.last_action = np.random.randint(1, 3) if self.incorrect_password_count > 0 else 0
        return np.array([
            self.incorrect_password,
            self.time_between_attempts,
            self.incorrect_password_count,
            self.last_action
        ])

    def step(self, action):
        reward = 0
        done = False
        
        if action == 0:  # Nicht sperren
            if not self.incorrect_password:
                reward = 1
                done = True
            elif self.time_between_attempts <= 3 or self.incorrect_password_count >= 10:
                reward = -1
                done = True
            else:
                reward = 0
        elif action == 1:  # 30s sperren
            if not self.incorrect_password:
                reward = -1
                done = True
            elif self.time_between_attempts <= 3 or (3 < self.incorrect_password_count <= 6):
                reward = 1
                done = True
            else:
                reward = 0
        elif action == 2:  # 1m sperren
            if not self.incorrect_password:
                reward = -1
                done = True
            elif (6 < self.incorrect_password_count <= 9):
                reward = 1
                done = True
            else:
                reward = 0
        elif action == 3:  # 3min sperren
            if not self.incorrect_password:
                reward = -1
                done = True
            elif (9 < self.incorrect_password_count < 10):
                reward = 1
                done = True
            else:
                reward = 0
        elif action == 4:  # Dauerhaft sperren
            if self.incorrect_password_count >= 10:
                reward = 1
                done = True
            else:
                reward = -1
                done = True

        return reward, done



## Umgebung testen

In [None]:
env = LoginEnv()

num_episodes = 10
for episode in range(num_episodes):
    state = env.reset()
    done = False
    total_reward = 0
    
    while not done:
        action = env.action_space.sample()
        reward, done = env.step(action)
        total_reward += reward
        print(f'State: {state}, Reward: {reward}')

    
    print(f'Episode {episode + 1}: Total Reward = {total_reward}')

## Deep Learning Modell definieren

In [None]:
# Imports
import tensorflow as tf
from tensorflow import keras


In [None]:
def build_model():
    model = keras.models.Sequential([
        keras.layers.Input(shape=(4,)),  # Eingabeschicht mit 4 Neuronen (entspricht der Größe des Zustandsraums)
        keras.layers.Dense(64, activation='relu'),  # Erste verborgene Schicht mit 64 Neuronen
        keras.layers.Dense(64, activation='relu'),  # Zweite verborgene Schicht mit 64 Neuronen
        keras.layers.Dense(5, activation='linear')  # Ausgabeschicht mit 5 Neuronen (entspricht der Anzahl der möglichen Aktionen)
    ])
    
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mse')  # Mean Squared Error Verlustfunktion

    return model

In [None]:
model = build_model()
model.summary()

## Deep Reeinforcement Learning Agenten definieren
Hier definieren wir ein einfaches neuronales Netzwerk mit TensorFlow, das die Umgebung steuert.

In [None]:
# Imports
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
            nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [None]:
dqn = build_agent(model, env.action_space.n)
dqn.compile(keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)