In [2]:
# Data manipulation
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import plotly.graph_objs as go

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Deep Learning
import tensorflow as tf
from tf_agents.environments import py_environment, tf_py_environment, suite_gym, suite_atari
from tf_agents.networks import q_network
from tf_agents.agents.dqn import dqn_agent
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.drivers import dynamic_step_driver
from tf_agents.specs import array_spec
from tf_agents.utils import common
from tf_agents.trajectories import trajectory

import warnings
warnings.filterwarnings('ignore')

In [3]:
loaded_model = tf.keras.models.load_model("neural_network_model.h5")





In [6]:
import numpy as np

class RPSLSEnvironment:
    def __init__(self, model, max_moves=100):
        self.model = model
        self.action_space = 5  # Assuming 5 possible actions (rock, paper, scissors, lizard, spock)
        self.max_moves = max_moves

    def reset(self):
        # Reset the environment
        self.current_state = []  # Initialize the sequence of previous moves
        self.num_moves = 0  # Initialize the number of moves
        return self.current_state

    def step(self, action):
        # Take a step in the environment
        self.current_state.append(action)  # Update the sequence of previous moves
        self.num_moves += 1  # Increment the number of moves
        reward = self.calculate_reward(action)  # Calculate the reward based on the action
        done = self.is_done()  # Check if the game is done
        return self.current_state, reward, done, {}

    def calculate_reward(self, action):
        # Define your reward function based on the current state and the action taken
        if len(self.current_state) < 2:
            return -0.1  # Small negative reward for each move
        else:
            player_move = action
            computer_move = self.current_state[-2]
            # Define the game rules
            if player_move == computer_move:
                return 0  # Tie
            elif (player_move - computer_move) % 5 in [1, 3]:
                return 1  # Player wins
            else:
                return -1  # Player loses

    def is_done(self):
        # Define your termination condition for the game
        return self.num_moves >= self.max_moves  # Terminate after a certain number of moves

    def get_action(self, epsilon=0.1):
        # Use the model to get the next action
        if np.random.rand() < epsilon:
            return np.random.choice(self.action_space)  # Choose a random action with probability epsilon
        else:
            return get_next_move(self.model, self.current_state)  # Choose the action with the highest predicted Q-value with probability 1-epsilon
        
# Initialize the environment with your loaded model
env = RPSLSEnvironment(loaded_model)

In [None]:
# Define the number of episodes to train for
num_episodes = 1000

# Define the discount factor
gamma = 0.9

# Define the initial epsilon
epsilon = 1.0

# Define the minimum epsilon
min_epsilon = 0.01

# Define the epsilon decay rate
epsilon_decay = 0.995

# Initialize the list to store rewards
rewards = []

# Loop over episodes
for i_episode in range(num_episodes):
    # Reset the state
    state = env.reset()

    # Initialize the episode reward
    episode_reward = 0

    # Loop over steps in the episode
    for t in range(env.max_moves):
        # Choose an action
        action = env.get_action(epsilon)

        # Take a step
        next_state, reward, done, _ = env.step(action)

        # Update the Q-values
        if done:
            target = reward
        else:
            target = reward + gamma * np.max(loaded_model.predict(np.array(next_state).reshape(1, len(next_state), 1)))

        target_f = loaded_model.predict(np.array(state).reshape(1, len(state), 1))
        target_f[0][action] = target

        # Train the model
        loaded_model.fit(np.array(state).reshape(1, len(state), 1), target_f, epochs=1, verbose=0)

        # Update the state and episode reward
        state = next_state
        episode_reward += reward

        # If the episode is done, break
        if done:
            break

    # Decay epsilon
    epsilon = max(min_epsilon, epsilon_decay * epsilon)

    # Store the episode reward
    rewards.append(episode_reward)

    # Print the episode reward every 100 episodes
    if i_episode % 100 == 0:
        print(f"Episode {i_episode}: {np.mean(rewards[-100:])}")