In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import random
import numpy as np

# Set the seed for the random number generator
seed = 1234
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

# Define the policy network using an MLP
class MLPPolicyNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLPPolicyNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        output = torch.softmax(self.fc2(x), dim=-1)
        return output

# Function to select an action based on the policy network's output probabilities
def select_action(policy_net, state):
    action_probs = policy_net(state)
    action_dist = Categorical(action_probs)
    action = action_dist.sample()
    return action.item()

# Function to update the policy network based on the REINFORCE algorithm
def reinforce_update(policy_net, saved_log_probs, rewards, optimizer):
    policy_loss = []
    for log_prob, reward in zip(saved_log_probs, rewards):
        policy_loss.append(-log_prob * reward)

    policy_loss = torch.stack(policy_loss).sum()
    optimizer.zero_grad()
    policy_loss.backward()
    optimizer.step()

# Function to play a round of Rock Paper Scissors against the biased opponent
def play_round(action):
    p = 0.9 # Probability for the first action
    q = random.random() * (1 - p)  # Random probability for the second action
    opponent_action_probs = [p, q, 1 - p - q]  # Biased probabilities for Rock, Paper, Scissors
    opponent_action = random.choices([0, 1, 2], weights=opponent_action_probs)[0]

    reward = get_reward(action, opponent_action)

    return opponent_action, reward, True

# Function to determine the reward based on the chosen action and opponent's action
def get_reward(action, opponent_action):
    if (action == 0 and opponent_action == 2) or (action == 1 and opponent_action == 0) or (action == 2 and opponent_action == 1):
        return 1  # Win
    elif action == opponent_action:
        return 0  # Tie
    else:
        return -1  # Lose

# Function to train the policy network using the REINFORCE algorithm
def train_policy_network():
    input_size = 3  # Number of possible actions (Rock, Paper, Scissors)
    hidden_size = 128
    output_size = 3

    policy_net = MLPPolicyNetwork(input_size, hidden_size, output_size)
    optimizer = optim.Adam(policy_net.parameters(), lr=0.001)

    num_episodes = 100000
    for episode in range(num_episodes):
        saved_log_probs = []
        rewards = []

        done = False
        while not done:
            state = torch.ones(1, input_size)  # Input state is a tensor of ones
            action = select_action(policy_net, state)
            opponent_action, reward, done = play_round(action)
            saved_log_probs.append(torch.log(policy_net(state))[0][action])
            rewards.append(reward)

        reinforce_update(policy_net, saved_log_probs, rewards, optimizer)

    return policy_net

# Train the policy network
policy_network = train_policy_network()

# Test the policy network
num_trials = 2000
wins = 0
losses = 0
ties = 0

for _ in range(num_trials):
    input_size = 3
    state = torch.ones(1, input_size)
    action = select_action(policy_network, state)
    opponent_action, reward, _ = play_round(action)

    if reward == 1:
        wins += 1
    elif reward == -1:
        losses += 1
    else:
        ties += 1

print(f"Wins: {wins}, Losses: {losses}, Ties: {ties}")



Wins: 1831, Losses: 105, Ties: 64


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import random
import torch.nn.functional as F

# Set the seed for the random number generator
seed = 1234
random.seed(seed)
torch.manual_seed(seed)

# Define the policy network using an MLP
class MLPPolicyNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLPPolicyNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        output = torch.softmax(self.fc2(x), dim=-1)
        dist = Categorical(probs=output)
        action = dist.sample()
        log_prob = dist.log_prob(action)
        return action,log_prob

# Function to select an action based on the policy network's output probabilities
def select_action(policy_net, state):
    action_probs = policy_net(state)
    action_dist = Categorical(action_probs)
    action = action_dist.sample()
    return action.item()

# Function to update the policy network based on the REINFORCE algorithm
def reinforce_update(policy_net, saved_log_probs, rewards, optimizer):
    policy_loss = []
    for log_prob, reward in zip(saved_log_probs, rewards):
        policy_loss.append(-log_prob * reward)

    policy_loss = torch.stack(policy_loss).sum()
    optimizer.zero_grad()
    policy_loss.backward()
    optimizer.step()
    return policy_loss

# Function to play a round of Rock Paper Scissors against the biased opponent
def play_round(action, p):
    q = random.random() * (1 - p)  # Random probability for the second action
    opponent_action_probs = [p, q, 1 - p - q]  # Biased probabilities for Rock, Paper, Scissors
    opponent_action = random.choices([0, 1, 2], weights=opponent_action_probs)[0]

    reward = get_reward(action, opponent_action)

    return opponent_action, reward, True

# Function to determine the reward based on the chosen action and opponent's action
def get_reward(action, opponent_action):
    if (action == 0 and opponent_action == 2) or (action == 1 and opponent_action == 0) or (action == 2 and opponent_action == 1):
        return 1  # Win
    elif action == opponent_action:
        return 0  # Tie
    else:
        return -1  # Lose

# Function to train the policy network using the MAML algorithm
def train_policy_network(p_values):
    input_size = 3  # Number of possible actions (Rock, Paper, Scissors)
    hidden_size = 128
    output_size = 3
    num_inner_updates = 1  # Number of inner updates in the MAML algorithm
    num_episodes = 10000
    policy_net = MLPPolicyNetwork(input_size, hidden_size, output_size)
    meta_optimizer = optim.Adam(policy_net.parameters(), lr=0.001)
    for episode in range(num_episodes):
            # Initialize the meta-optimizer with the current policy network parameters
            meta_optimizer.zero_grad()
            meta_loss = 0
            losses = []
            for p in p_values:
              for _ in range(num_inner_updates):
                  # Clone the policy network for the inner update
                  policy_net_clone = MLPPolicyNetwork(input_size, hidden_size, output_size)
                  policy_net_clone.load_state_dict(policy_net.state_dict())
                  saved_log_probs = []
                  rewards = []

                  state,_,_ = play_round(1,p)  # Task action
                  inp = F.one_hot(torch.tensor(state),3).float()
                  out,log_prob = policy_net_clone(inp)
                  reward = get_reward(out.item(),state)
                  loss = -log_prob*reward
                  loss.backward(create_graph=True)
                  for parameter in policy_net_clone.parameters():
                     parameter.data-= 0.01*parameter.grad
                  state,_,_ = play_round(1,p)
                  inp = F.one_hot(torch.tensor(state),3).float()
                  out,log_prob = policy_net_clone(inp)
                  reward = get_reward(out,state)
                  loss = -log_prob*reward
                  losses.append(loss)
              meta_loss = torch.sum(torch.stack(losses))/len(losses)
              meta_loss.backward(retain_graph=True)
              meta_optimizer.step()





    return policy_net

# Train the policy network for different initial bias values
p_values = [0.3, 0.5, 0.7,0.8,0.9,0.32,0.45]
#policy_network = train_policy_network(p_values)

# Test the policy network
num_trials = 70
p_values = [0.7,0.8,0.9,0.3,0.5,0.2,0.4]
wins = {p: 0 for p in p_values}
losses = {p: 0 for p in p_values}
ties = {p: 0 for p in p_values}
for p in p_values:
     p_net = MLPPolicyNetwork(3, 128, 3)
     opt = torch.optim.Adam(p_net.parameters(),1e-3)
     p_net.load_state_dict(policy_network.state_dict())
     for _ in range(num_trials):
        state,_,_ = play_round(1,p)  # Task action
        inp = F.one_hot(torch.tensor(state),3).float()
        out,log_prob = p_net(inp)
        reward = get_reward(out.item(),state)
        loss = -log_prob*reward
        opt.zero_grad()
        loss.backward()
        opt.step()
     ##Test ##
     wins=0
     loss=0
     draw =0
     for _ in range(num_trials):
        state,_,_ = play_round(1,p)  # Task action
        inp = F.one_hot(torch.tensor(state),3).float()
        out,log_prob = p_net(inp)
        reward = get_reward(out.item(),state)
        if(reward==1):
          wins+=1
        elif(reward==0):
          draw+=1
        else:
          loss+=1
     print(p,wins,draw,loss)













0.7 41 17 12
0.8 47 17 6
0.9 53 11 6
0.3 34 21 15
0.5 42 12 16
0.2 37 21 12
0.4 33 23 14


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import torch.nn.functional as F

# Define the moves
rock = 0
paper = 1
scissors = 2

# Define the outcome matrix
outcome_matrix = torch.tensor([[0, -1, 1], [1, 0, -1], [-1, 1, 0]])

class RPS_MLP_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(RPS_MLP_LSTM, self).__init__()
        self.hidden_dim = hidden_dim

        # LSTM layer
        self.lstm = nn.LSTM(input_dim, hidden_dim)

        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        seq_len,_ = x.size()
        h0 = torch.zeros(1, self.hidden_dim).to(x.device)
        c0 = torch.zeros(1, self.hidden_dim).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out)
        return out

# Initialize the RPS MLP with LSTM model
input_dim = 3
hidden_dim = 32
output_dim = 3
model = RPS_MLP_LSTM(input_dim, hidden_dim, output_dim)

# Define the REINFORCE training function
def train_reinforce(model, num_episodes, sequence_length, lr):
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for episode in range(num_episodes):
        log_probs = []
        rewards = []

        # Generate a single sequence of RPS moves with skewed probabilities
        probabilities = torch.tensor([0.9, 0.1, 0.0])  # Probabilities for rock, paper, scissors
        input_sequence = torch.multinomial(probabilities, sequence_length, replacement=True)
        input_one_hot = F.one_hot(input_sequence,3)
        #print(input_one_hot)



        # Initialize the hidden state and cell state of LSTM
        h0 = torch.zeros(1, hidden_dim)
        c0 = torch.zeros(1, hidden_dim)

        # Pass the input sequence through the model
        outputs = model(input_one_hot.float())
        print(outputs.shape)
        m = Categorical(torch.softmax(outputs, dim=-1))


            # Sample an action
        action_ = m.sample()
        log_prob = m.log_prob(action_)
        #print(action_)


        # Compute the reward for each output
        for i in range(sequence_length):
            # Compute the action probabilities from the output

            action = action_[i]

            if(action ==input_sequence[i]):
              reward = 0.0
            elif(action==0 and input_sequence[i]==1):
              reward = -1.0
            elif(action==0 and input_sequence[i]==2):
              reward = 1.0

            elif(action==1 and input_sequence[i]==2):
              reward = -1.0
            elif(action==1 and input_sequence[i]==0):
              reward = 1.0

            elif(action==2 and input_sequence[i]==0):
              reward = -1.0
            elif(action==2 and input_sequence[i]==1):
              reward = 1.0



            # Compute the reward based on the sampled action and actual move


            # Store the log probability and reward
            log_probs.append(log_prob[i])
            rewards.append(reward)


        # Compute the cumulative rewards
        cum_rewards = []
        cumulative_reward = 0
        for r in reversed(rewards):
            cumulative_reward = r + cumulative_reward
            cum_rewards.insert(0, cumulative_reward)

        # Compute the loss and update the model
        log_probs = torch.stack(log_probs)
        cum_rewards = torch.tensor(cum_rewards)
        loss = -torch.sum(log_probs * cum_rewards)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print the loss for monitoring
        print("Episode: {}, Loss: {}".format(episode+1, loss.item()))

# Example usage
num_episodes = 10000
sequence_length = 100
learning_rate = 0.001

train_reinforce(model, num_episodes, sequence_length, learning_rate)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import torch.nn.functional as F

# Define the moves
rock = 0
paper = 1
scissors = 2

# Define the outcome matrix
outcome_matrix = torch.tensor([[0, -1, 1], [1, 0, -1], [-1, 1, 0]])

class RPS_MLP_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(RPS_MLP_LSTM, self).__init__()
        self.hidden_dim = hidden_dim

        # LSTM layer
        self.lstm = nn.LSTM(input_dim, hidden_dim)

        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        seq_len,_ = x.size()
        h0 = torch.zeros(1, self.hidden_dim).to(x.device)
        c0 = torch.zeros(1, self.hidden_dim).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out)
        return out

# Initialize the RPS MLP with LSTM model
input_dim = 3
hidden_dim = 32
output_dim = 3
model = RPS_MLP_LSTM(input_dim, hidden_dim, output_dim)

# Define the REINFORCE training function
def train_reinforce(model, num_episodes, sequence_length, lr):
    optimizer = optim.Adam(model.parameters(), lr=lr)



    for episode in range(num_episodes):
      meta_loss = []
      for probabilities in [torch.tensor([0.9, 0.1, 0.0]),torch.tensor([0.8, 0.1, 0.1]),torch.tensor([0.1, 0.9, 0.8]),torch.tensor([0.5, 0.5, 0.0]),torch.tensor([0.1, 0.1, 0.8]),torch.tensor([0.2, 0.1, 0.7])]:

        log_probs = []
        rewards = []


        # Generate a single sequence of RPS moves with skewed probabilities
          # Probabilities for rock, paper, scissors
        input_sequence = torch.multinomial(probabilities, sequence_length, replacement=True)
        input_one_hot = F.one_hot(input_sequence,3)
        #print(input_one_hot)



        # Initialize the hidden state and cell state of LSTM
        h0 = torch.zeros(1, hidden_dim)
        c0 = torch.zeros(1, hidden_dim)

        # Pass the input sequence through the model
        model_cloned = RPS_MLP_LSTM(input_dim, hidden_dim, output_dim)
        model_cloned.load_state_dict(model.state_dict())
        opt_cloned = torch.optim.SGD(model_cloned.parameters(),1e-2)  ## Not used

        outputs = model_cloned(input_one_hot.float())

        m = Categorical(torch.softmax(outputs, dim=-1))


            # Sample an action
        action_ = m.sample()
        log_prob = m.log_prob(action_)
        #print(action_)


        # Compute the reward for each output
        for i in range(sequence_length):
            # Compute the action probabilities from the output

            action = action_[i]

            if(action ==input_sequence[i]):
              reward = 0.0
            elif(action==0 and input_sequence[i]==1):
              reward = -1.0
            elif(action==0 and input_sequence[i]==2):
              reward = 1.0

            elif(action==1 and input_sequence[i]==2):
              reward = -1.0
            elif(action==1 and input_sequence[i]==0):
              reward = 1.0

            elif(action==2 and input_sequence[i]==0):
              reward = -1.0
            elif(action==2 and input_sequence[i]==1):
              reward = 1.0



            # Compute the reward based on the sampled action and actual move


            # Store the log probability and reward
            log_probs.append(log_prob[i])
            rewards.append(reward)


        # Compute the cumulative rewards
        cum_rewards = []
        cumulative_reward = 0
        for r in reversed(rewards):
            cumulative_reward = r + cumulative_reward
            cum_rewards.insert(0, cumulative_reward)

        # Compute the loss and update the model
        log_probs = torch.stack(log_probs)
        cum_rewards = torch.tensor(cum_rewards)
        loss = -torch.sum(log_probs * cum_rewards)
        loss.backward(retain_graph=True)
        for parameter in model_cloned.parameters():
          parameter.data-= 0.01*parameter.grad
        opt_cloned.zero_grad()

        log_probs = []
        rewards=[]
        input_sequence = torch.multinomial(probabilities, sequence_length, replacement=True)
        input_one_hot = F.one_hot(input_sequence,3)
        #print(input_one_hot)



        # Initialize the hidden state and cell state of LSTM
        h0 = torch.zeros(1, hidden_dim)
        c0 = torch.zeros(1, hidden_dim)

        # Pass the input sequence through the model
        model_cloned = RPS_MLP_LSTM(input_dim, hidden_dim, output_dim)
        model_cloned.load_state_dict(model.state_dict())

        outputs = model_cloned(input_one_hot.float())

        m = Categorical(torch.softmax(outputs, dim=-1))


            # Sample an action
        action_ = m.sample()
        log_prob = m.log_prob(action_)
        #print(action_)


        # Compute the reward for each output
        for i in range(sequence_length):
            # Compute the action probabilities from the output

            action = action_[i]

            if(action ==input_sequence[i]):
              reward = 0.0
            elif(action==0 and input_sequence[i]==1):
              reward = -1.0
            elif(action==0 and input_sequence[i]==2):
              reward = 1.0

            elif(action==1 and input_sequence[i]==2):
              reward = -1.0
            elif(action==1 and input_sequence[i]==0):
              reward = 1.0

            elif(action==2 and input_sequence[i]==0):
              reward = -1.0
            elif(action==2 and input_sequence[i]==1):
              reward = 1.0



            # Compute the reward based on the sampled action and actual move


            # Store the log probability and reward
            log_probs.append(log_prob[i])
            rewards.append(reward)


        # Compute the cumulative rewards
        cum_rewards = []
        cumulative_reward = 0
        for r in reversed(rewards):
            cumulative_reward = r + cumulative_reward
            cum_rewards.insert(0, cumulative_reward)

        # Compute the loss and update the model
        log_probs = torch.stack(log_probs)
        cum_rewards = torch.tensor(cum_rewards)
        loss = -torch.sum(log_probs * cum_rewards)
        meta_loss.append(loss)

      loss_tot = torch.sum(torch.stack(meta_loss))/len(meta_loss)
      optimizer.zero_grad()
      loss_tot.backward()
      optimizer.step()
      print("Episode: {}, Loss: {}".format(episode+1, loss_tot.item()))

# Example usage
num_episodes = 1000
sequence_length = 100
learning_rate = 0.001

train_reinforce(model, num_episodes, sequence_length, learning_rate)


Episode: 1, Loss: 87.02294158935547
Episode: 2, Loss: 138.2305145263672
Episode: 3, Loss: 94.677978515625
Episode: 4, Loss: 12.0185546875
Episode: 5, Loss: 159.50355529785156
Episode: 6, Loss: -36.04782485961914
Episode: 7, Loss: 90.3535385131836
Episode: 8, Loss: -68.0043716430664
Episode: 9, Loss: 119.5509033203125
Episode: 10, Loss: 341.01055908203125
Episode: 11, Loss: -121.37286376953125
Episode: 12, Loss: 319.4160461425781
Episode: 13, Loss: 38.41069412231445
Episode: 14, Loss: -407.9892578125
Episode: 15, Loss: 17.099451065063477
Episode: 16, Loss: 258.1130065917969
Episode: 17, Loss: -379.1492004394531
Episode: 18, Loss: -54.5765495300293
Episode: 19, Loss: 162.23150634765625
Episode: 20, Loss: 5.848731994628906
Episode: 21, Loss: -54.19862747192383
Episode: 22, Loss: -49.76011276245117
Episode: 23, Loss: 136.52735900878906
Episode: 24, Loss: -370.3114013671875
Episode: 25, Loss: 470.4056396484375
Episode: 26, Loss: 132.26548767089844
Episode: 27, Loss: 454.7788391113281
Episod

In [None]:
print(model)
test_model = RPS_MLP_LSTM(input_dim, hidden_dim, output_dim)
test_model.load_state_dict(model.state_dict())
probabilities =torch.tensor([1.0,0.0,0.0])
model_cloned = RPS_MLP_LSTM(input_dim, hidden_dim, output_dim)
for episode in range(200):
        log_probs = []
        rewards = []


        # Generate a single sequence of RPS moves with skewed probabilities
          # Probabilities for rock, paper, scissors
        input_sequence = torch.multinomial(probabilities, sequence_length, replacement=True)
        input_one_hot = F.one_hot(input_sequence,3)
        #print(input_one_hot)



        # Initialize the hidden state and cell state of LSTM
        h0 = torch.zeros(1, hidden_dim)
        c0 = torch.zeros(1, hidden_dim)

        # Pass the input sequence through the model

        if(episode==0):
                  #pass
                  model_cloned.load_state_dict(model.state_dict())
        opt_cloned = torch.optim.Adam(model_cloned.parameters(),1e-3)  ## Not used

        outputs = model_cloned(input_one_hot.float())

        m = Categorical(torch.softmax(outputs, dim=-1))


            # Sample an action
        action_ = m.sample()
        log_prob = m.log_prob(action_)
        print(action_)


        # Compute the reward for each output
        for i in range(sequence_length):
            # Compute the action probabilities from the output

            action = action_[i]

            if(action ==input_sequence[i]):
              reward = 0.0
            elif(action==0 and input_sequence[i]==1):
              reward = -1.0
            elif(action==0 and input_sequence[i]==2):
              reward = 1.0

            elif(action==1 and input_sequence[i]==2):
              reward = -1.0
            elif(action==1 and input_sequence[i]==0):
              reward = 1.0

            elif(action==2 and input_sequence[i]==0):
              reward = -1.0
            elif(action==2 and input_sequence[i]==1):
              reward = 1.0



            # Compute the reward based on the sampled action and actual move


            # Store the log probability and reward
            log_probs.append(log_prob[i])
            rewards.append(reward)


        # Compute the cumulative rewards
        cum_rewards = []
        cumulative_reward = 0
        for r in reversed(rewards):
            cumulative_reward = r + cumulative_reward
            cum_rewards.insert(0, cumulative_reward)

        # Compute the loss and update the model
        log_probs = torch.stack(log_probs)
        cum_rewards = torch.tensor(cum_rewards)
        loss = -torch.sum(log_probs * cum_rewards)
        opt_cloned.zero_grad()
        loss.backward()
        opt_cloned.step()
        print(torch.sum(cum_rewards))


RPS_MLP_LSTM(
  (lstm): LSTM(3, 32)
  (fc): Linear(in_features=32, out_features=3, bias=True)
)
tensor([2, 2, 0, 1, 0, 0, 0, 0, 1, 2, 2, 0, 0, 2, 0, 1, 1, 0, 0, 0, 2, 0, 1, 2,
        1, 2, 1, 1, 2, 0, 1, 1, 2, 0, 1, 2, 1, 2, 2, 2, 1, 0, 2, 0, 2, 1, 2, 0,
        2, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0, 1, 0, 0, 0, 1, 2, 1, 0, 1, 2, 2, 2, 1,
        1, 1, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 2, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0,
        1, 2, 0, 1])
tensor(386.)
tensor([1, 0, 1, 1, 0, 2, 1, 0, 1, 0, 1, 1, 2, 0, 1, 2, 1, 0, 1, 2, 0, 2, 0, 2,
        0, 2, 2, 2, 0, 1, 2, 0, 1, 0, 1, 2, 0, 2, 0, 2, 1, 1, 1, 0, 0, 1, 1, 2,
        0, 2, 0, 1, 0, 2, 1, 0, 0, 2, 2, 0, 1, 2, 2, 2, 0, 1, 2, 2, 2, 2, 1, 1,
        1, 2, 2, 1, 0, 1, 0, 2, 1, 1, 0, 2, 1, 1, 0, 2, 2, 1, 1, 0, 1, 2, 2, 0,
        0, 2, 0, 2])
tensor(-309.)
tensor([1, 1, 2, 1, 0, 2, 2, 0, 0, 1, 2, 1, 2, 0, 0, 1, 0, 1, 1, 0, 2, 2, 0, 2,
        1, 0, 1, 2, 2, 0, 1, 0, 0, 2, 0, 1, 0, 2, 2, 0, 1, 1, 1, 2, 2, 2, 2, 0,
        0, 2, 1, 1, 1, 1, 0, 2, 0, 