In [1]:
# Install Dependencies
!pip install tensorflow



In [2]:
# Import Dependencies
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

Setting up RPSLS Environment

In [4]:
# Set up environment for agent to learn. Includes defining actions and rewards.
# The agent and the opponet play at random.
class RockPaperScissorsLizardSpockEnvironment:
     def __init__(self):
        self.actions = ["rock", "paper", "scissors", "lizard", "spock"]
        self.rewards = {
            ("rock", "rock"): 0, ("rock", "paper"): -1, ("rock", "scissors"): 1, ("rock", "lizard"): 1,
            ("rock", "spock"): -1,("paper", "rock"): 1, ("paper", "paper"): 0, ("paper", "scissors"): -1,
            ("paper", "lizard"): -1, ("paper", "spock"): 1, ("scissors", "rock"): -1, ("scissors", "paper"): 1,
            ("scissors", "scissors"): 0, ("scissors", "lizard"): 1, ("scissors", "spock"): -1,
            ("lizard", "rock"): -1, ("lizard", "paper"): 1, ("lizard", "scissors"): -1,
            ("lizard", "lizard"): 0, ("lizard", "spock"): 1,("spock", "rock"): 1, ("spock", "paper"): -1,
            ("spock", "scissors"): 1, ("spock", "lizard"): -1, ("spock", "spock"): 0}

        # Maximum number of games.
        self.max_steps = 50
        self.current_step = 0


     def step(self, action):
        opponent_action = random.choice(self.actions)
        reward = self.rewards[(action, opponent_action)]
        self.current_step += 1
        done = self.current_step >= self.max_steps
        return opponent_action, reward, done

     def reset(self):
        self.current_step = 0
        action = random.choice(self.actions)
        return action

In [5]:
# Defining the Agent's actions and states.
# The agent chooses a random hand each time.
class RLAgent:
    def __init__(self):
        self.actions = ["rock", "paper", "scissors", "lizard", "spock"]
        self.rewards = {
            ("rock", "rock"): 0, ("rock", "paper"): -1, ("rock", "scissors"): 1,
            ("rock", "lizard"): 1, ("rock", "spock"): -1,
            ("paper", "rock"): 1, ("paper", "paper"): 0, ("paper", "scissors"): -1,
            ("paper", "lizard"): -1, ("paper", "spock"): 1,
            ("scissors", "rock"): -1, ("scissors", "paper"): 1, ("scissors", "scissors"): 0,
            ("scissors", "lizard"): 1, ("scissors", "spock"): -1,
            ("lizard", "rock"): -1, ("lizard", "paper"): 1, ("lizard", "scissors"): -1,
            ("lizard", "lizard"): 0, ("lizard", "spock"): 1,
            ("spock", "rock"): 1, ("spock", "paper"): -1, ("spock", "scissors"): 1,
            ("spock", "lizard"): -1, ("spock", "spock"): 0
        }

        # Maximum number of games.
        self.max_steps = 50

    def choose_action(self, state):
        import random
        action = random.choice(self.actions)
        return action

In [6]:
# Training loop.
score = 0
env = RockPaperScissorsLizardSpockEnvironment()
agent = RLAgent()
episodes = 50
states = []
rewards = []

for episode in range(1,episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        action = agent.choose_action(state)
        next_state, reward, done = env.step(action)
        score+=reward
        states.append(state)
        rewards.append(reward)
        state = next_state
    print('Episode:{} Score{}'.format(episode,score))

Episode:1 Score-6
Episode:2 Score6
Episode:3 Score7
Episode:4 Score12
Episode:5 Score5
Episode:6 Score-7
Episode:7 Score4
Episode:8 Score2
Episode:9 Score-11
Episode:10 Score1
Episode:11 Score-9
Episode:12 Score11
Episode:13 Score6
Episode:14 Score11
Episode:15 Score-4
Episode:16 Score-9
Episode:17 Score-6
Episode:18 Score10
Episode:19 Score-6
Episode:20 Score-6
Episode:21 Score9
Episode:22 Score-1
Episode:23 Score3
Episode:24 Score-3
Episode:25 Score-6
Episode:26 Score-1
Episode:27 Score-1
Episode:28 Score-9
Episode:29 Score-1
Episode:30 Score7
Episode:31 Score7
Episode:32 Score3
Episode:33 Score-8
Episode:34 Score8
Episode:35 Score-5
Episode:36 Score-3
Episode:37 Score-1
Episode:38 Score-7
Episode:39 Score-4
Episode:40 Score2
Episode:41 Score1
Episode:42 Score-4
Episode:43 Score-8
Episode:44 Score8
Episode:45 Score-3
Episode:46 Score2
Episode:47 Score-8
Episode:48 Score-2
Episode:49 Score-4
Episode:50 Score0


In [7]:
# Verifying states are strings.
print(states)

['lizard', 'paper', 'scissors', 'rock', 'lizard', 'scissors', 'paper', 'scissors', 'rock', 'paper', 'scissors', 'rock', 'rock', 'paper', 'paper', 'paper', 'paper', 'lizard', 'paper', 'rock', 'rock', 'rock', 'scissors', 'spock', 'lizard', 'rock', 'scissors', 'scissors', 'lizard', 'spock', 'spock', 'rock', 'spock', 'scissors', 'paper', 'lizard', 'paper', 'rock', 'lizard', 'spock', 'rock', 'spock', 'scissors', 'paper', 'scissors', 'lizard', 'scissors', 'scissors', 'spock', 'scissors', 'rock', 'paper', 'rock', 'scissors', 'rock', 'paper', 'rock', 'lizard', 'lizard', 'spock', 'rock', 'rock', 'rock', 'rock', 'paper', 'spock', 'lizard', 'paper', 'paper', 'paper', 'lizard', 'paper', 'paper', 'rock', 'lizard', 'lizard', 'rock', 'rock', 'lizard', 'rock', 'paper', 'lizard', 'scissors', 'lizard', 'paper', 'paper', 'spock', 'spock', 'rock', 'scissors', 'paper', 'rock', 'spock', 'paper', 'paper', 'scissors', 'paper', 'scissors', 'scissors', 'lizard', 'scissors', 'scissors', 'lizard', 'paper', 'sciss

In [8]:
# To help with modeling, convert string to numbers.
# Define a dictionary to map states to numbers.
state_to_number = {"rock": 0, "paper": 1, "scissors": 2, "lizard": 3, "spock": 4}

# Convert the list of states to a list of corresponding numbers.
states_encoded = [state_to_number[state] for state in states]

In [9]:
# Verifying the conversion was successful.
print(states_encoded)

[3, 1, 2, 0, 3, 2, 1, 2, 0, 1, 2, 0, 0, 1, 1, 1, 1, 3, 1, 0, 0, 0, 2, 4, 3, 0, 2, 2, 3, 4, 4, 0, 4, 2, 1, 3, 1, 0, 3, 4, 0, 4, 2, 1, 2, 3, 2, 2, 4, 2, 0, 1, 0, 2, 0, 1, 0, 3, 3, 4, 0, 0, 0, 0, 1, 4, 3, 1, 1, 1, 3, 1, 1, 0, 3, 3, 0, 0, 3, 0, 1, 3, 2, 3, 1, 1, 4, 4, 0, 2, 1, 0, 4, 1, 1, 2, 1, 2, 2, 3, 2, 2, 3, 1, 2, 3, 1, 3, 2, 3, 2, 3, 3, 0, 4, 4, 4, 2, 0, 4, 0, 3, 1, 3, 4, 1, 3, 3, 2, 0, 0, 0, 4, 4, 1, 1, 1, 2, 4, 4, 0, 1, 4, 3, 2, 0, 4, 1, 1, 0, 1, 1, 3, 2, 2, 2, 2, 4, 4, 3, 0, 1, 3, 1, 4, 3, 1, 4, 1, 3, 3, 0, 0, 2, 0, 3, 3, 3, 4, 0, 4, 2, 1, 0, 0, 0, 4, 4, 2, 3, 1, 2, 1, 3, 4, 0, 0, 1, 1, 0, 1, 1, 0, 3, 1, 2, 3, 0, 2, 4, 2, 0, 1, 3, 0, 3, 1, 1, 2, 4, 2, 4, 1, 2, 2, 4, 0, 1, 0, 2, 0, 3, 3, 0, 1, 1, 3, 2, 2, 2, 2, 1, 2, 2, 1, 3, 4, 3, 0, 2, 1, 3, 2, 1, 3, 3, 3, 2, 3, 2, 2, 3, 0, 3, 4, 2, 3, 4, 3, 1, 4, 4, 2, 2, 4, 3, 1, 4, 1, 1, 1, 0, 2, 1, 0, 2, 3, 1, 4, 1, 4, 1, 3, 0, 1, 4, 3, 4, 0, 4, 0, 0, 1, 4, 0, 0, 2, 4, 0, 0, 3, 1, 1, 1, 4, 0, 0, 1, 2, 4, 0, 3, 2, 2, 1, 0, 0, 1, 3, 4, 4, 2, 4, 

In [10]:
# Verifying rewards.
print(rewards)

[0, 0, 1, -1, 1, 1, 1, 1, 1, -1, 0, -1, -1, 0, 1, -1, -1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, -1, -1, 1, -1, 1, -1, -1, -1, 0, 1, -1, -1, -1, 0, 1, 1, 0, 1, -1, -1, -1, -1, 0, 1, 1, 0, -1, -1, -1, 1, 1, -1, -1, 1, 1, -1, 0, 0, -1, 1, -1, -1, -1, 1, 1, -1, -1, 0, 1, -1, 1, 1, 1, 1, 1, -1, 0, 1, -1, 0, 0, 1, 0, 1, 0, 1, 0, 0, -1, 1, 1, 0, 1, -1, 1, 0, 1, -1, 1, -1, 1, 1, -1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, -1, 1, -1, 1, 1, 0, 0, 1, 0, -1, 1, -1, 1, 0, -1, -1, 1, 0, 1, 1, 0, -1, 1, -1, 0, 0, -1, -1, 1, -1, 0, 1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 1, 0, 1, 1, 1, 1, -1, 1, -1, -1, -1, 0, 1, -1, 1, 1, -1, 1, -1, -1, 1, 1, 0, 1, 1, -1, -1, 1, 1, 1, -1, 0, 1, 0, 1, 1, 1, 1, -1, -1, -1, -1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, -1, 1, -1, -1, -1, -1, 1, -1, 1, 0, -1, 1, -1, -1, -1, 1, 1, 0, 1, -1, 1, 0, -1, 1, -1, 1, 1, 0, 1, -1, 1, -1, 1, 1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 0, -1, -1, 0, -1, -1, 1, 1, 1, 0, -1, 0, -1, 1, 0, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, -1, -1, 0, -1, 1, 1, -1, -1, -1, 1, -1, -1, 0, -1

In [11]:
# Split the data.
# X contains converted states and y contains rewards.
X_train, X_test, y_train, y_test = train_test_split(states_encoded, rewards, test_size=0.2)

Create Deep Learning Model with Tensorflow and Keras

In [12]:
# Create a function for the deep learning model adding layers and nodes.
# This model is built within a function so we can call it back whenever needed.
actions = 5
def build_model (states, actions):
  model = Sequential()
  model.add(Dense(24, activation='relu', input_shape=(5,)))
  model.add(Dense(24, activation='relu'))
  model.add(Dense(actions, activation='linear'))
  return model

In [13]:
# Build the model using our new function.
model = build_model(states,actions)

In [14]:
# Summary of model.
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 24)                144       
                                                                 
 dense_1 (Dense)             (None, 24)                600       
                                                                 
 dense_2 (Dense)             (None, 5)                 125       
                                                                 
Total params: 869 (3.39 KB)
Trainable params: 869 (3.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
# Convert states to one-hot encoded vectors.
def state_to_one_hot(state):
    one_hot = [0] * 5  # 5 states: rock, paper, scissors, lizard, spock
    one_hot[state] = 1
    return one_hot

X_train_one_hot = [state_to_one_hot(state) for state in X_train]
X_test_one_hot = [state_to_one_hot(state) for state in X_test]

# Convert to NumPy arrays
X_train_array = np.array(X_train_one_hot)
X_test_array = np.array(X_test_one_hot)

# Now you can inspect the shapes
print("X_train shape:", X_train_array.shape)
print("X_test shape:", X_test_array.shape)


X_train shape: (2000, 5)
X_test shape: (500, 5)


In [16]:
# One-hot encode the target data.
y_train_encoded = to_categorical(y_train, num_classes=5)
y_test_encoded = to_categorical(y_test, num_classes=5)

In [22]:
# Compile the model and train.
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['accuracy'])

fit_model = model.fit(X_train_array, y_train_encoded, epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [23]:
# Evaluate the performance of model using the loss and predictive accuracy of the model on the test dataset.
model_loss, model_accuracy = model.evaluate(X_test_array,y_test_encoded,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

16/16 - 0s - loss: 0.1269 - accuracy: 0.4000 - 129ms/epoch - 8ms/step
Loss: 0.12689444422721863, Accuracy: 0.4000000059604645
