In [None]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import namedtuple, deque
import torch.optim as optim
import time
import tensorflow as tf
import math
import tensorflow_probability as tfp

In [None]:
class ActorCriticModel(tf.keras.Model):
    """
    Defining policy and value networkss
    """
    def __init__(self, action_size, n_hidden1=1024, n_hidden2=512):
        super(ActorCriticModel, self).__init__()

        #Hidden Layer 1
        self.fc1 = tf.keras.layers.Dense(n_hidden1, activation='relu')
        #Hidden Layer 2
        self.fc2 = tf.keras.layers.Dense(n_hidden2, activation='relu')

        #Output Layer for policy
        self.pi_out = tf.keras.layers.Dense(action_size, activation='softmax')
        #Output Layer for state-value
        self.v_out = tf.keras.layers.Dense(1)

    def call(self, state):
        """
        Computes policy distribution and state-value for a given state
        """
        #print(type(state))
        state = tf.reshape(state,(2,1))
        layer1 = self.fc1(state)
        layer2 = self.fc2(layer1)

        pi = self.pi_out(layer2)
        v = self.v_out(layer2)

        return pi, v

In [None]:
class Agent:
    """
    Agent class
    """
    def __init__(self, action_size, lr=0.001, gamma=0.99, seed = 85):
        self.gamma = gamma
        self.ac_model = ActorCriticModel(action_size=action_size)
        self.ac_model.compile(tf.keras.optimizers.Adam(learning_rate=lr))
        np.random.seed(seed)

    def sample_action(self, state):
        """
        Given a state, compute the policy distribution over all actions and sample one action
        """
        pi,_ = self.ac_model(state)

        action_probabilities = tfp.distributions.Categorical(probs=pi)
        sample = action_probabilities.sample()

        return int(sample.numpy()[0])

    def actor_loss(self, action, pi, delta):
        """
        Compute Actor Loss
        """
        return -tf.math.log(pi[0,action]) * delta

    def critic_loss(self,delta):
        """
        Critic loss aims to minimize TD error
        """
        return delta**2

    @tf.function
    def learn(self, state, action, reward, next_state, done):
        """
        For a given transition (s,a,s',r) update the paramters by computing the
        gradient of the total loss
        """
        with tf.GradientTape(persistent=True) as tape:
            pi, V_s = self.ac_model(state)
            _, V_s_next = self.ac_model(next_state)

            V_s = tf.squeeze(V_s)
            V_s_next = tf.squeeze(V_s_next)

            if done:
              V_s_next = 0


            #### TO DO: Write the equation for delta (TD error)
            ## Write code below
            delta = reward + self.gamma * V_s_next - V_s
            loss_a = self.actor_loss(action, pi, delta)
            loss_c =self.critic_loss(delta)
            loss_total = loss_a + loss_c

        gradient = tape.gradient(loss_total, self.ac_model.trainable_variables)
        self.ac_model.optimizer.apply_gradients(zip(gradient, self.ac_model.trainable_variables))

In [None]:
class Team():

  def __init__(self,self_features,opponent_features):
    self.agent = Agent(2,6,0)
    self.self_features = self_features
    self.opponent_features = opponent_features
    self.explore_wicket = 0
    self.explore_runs = 0

  def get_next_batter(self,order,wicket):
    order = {k:v.tolist() for k,v in order.items()}
    order = dict(sorted(order.items(), key=lambda item: item[wicket]))
    next_batter = list(order)[-1]
    del order[next_batter]
    return next_batter

  def get_next_bowler(self):
    return np.random.randint(0,5)

  def get_batting_action(self, ball, total_runs, wickets_left, score_to_chase, feature_batter, feature_bowler): # code for UCTS
    # batting_action = np.random.randint(0,6)
    state = np.array([ball, wickets_left])
    return self.agent.sample_action(state)

  def get_bowling_action(self,ball,total_runs,wickets_left,score_to_chase,feature_batter,feature_bowler): # code for UCTS
    bowling_action = np.random.randint(0,3)
    return bowling_action

  def get_explore_actions(self):
    batter = np.random.randint(0,5)
    bowler = np.random.randint(0,5)
    batting_action = np.random.randint(0,6)
    bowling_action = np.random.randint(0,3)
    feature_batter = self.self_features[batter, 0:2]
    feature_bowler = self.self_features[bowler, 2:4]
    return feature_batter, feature_bowler, batting_action, bowling_action, batter, bowler

  def set_explore_outcomes(self,wicket,runs):
    self.explore_wicket = wicket
    self.explore_runs = runs

  def explore_compute(self):
    pass

  def explore_dp(self):
    pass

  # def call_dqn(self):
  #   agent = Agent(state_size= 3, action_size = 6, seed = 0)
  #   scores = self.dqn()

class Australia(Team):
  pass

class India(Team):
  pass

In [None]:
pout_actions_min = np.array([0.005,0.01,0.02,0.1,0.2,0.0])
pout_actions_max = np.array([0.05,0.1,0.15,0.25,0.35,0.0])
prun_features_max = 0.65
prun_features_min = 0.25

risk_max_aggression = 1.2
risk_min_aggression = 1.05

risk_max_economical = 0.8
risk_min_economical = 0.5

easy_max_aggression = 0.2
easy_min_aggression = 0.3
easy_max_economical = -0.2
easy_min_economical = -0.05

batting_action_runs_map = np.array([0,1,2,3,4,6])
bowling_action_risk_map = np.array([-0.5,0,1])


class Match:

  def __init__(self,num_balls, explore_num_balls,action_timeout, explore_timeout, TeamOne, TeamTwo):
    # self.environment = Environment()
    self.num_balls = num_balls
    self.explore_num_balls = explore_num_balls
    self.action_timeout = action_timeout
    self.explore_timeout = explore_timeout
    self.team_one_features = np.random.uniform(1,5,size=(5,4))
    self.team_two_features = np.random.uniform(1,5,size=(5,4))
    self.team_one = TeamOne(self.team_one_features, self.team_two_features)
    self.team_two = TeamTwo(self.team_two_features, self.team_one_features)
    self.current_batters_list =np.array([1,1,1,1,1])
    self.current_bowlers_list =np.array([2,2,2,2,2])
    self.num_miss_team_batting = 0
    self.num_miss_team_bowling = 0
    self.order = dict.fromkeys([0,1,2,3,4],0)


  def explore_phase_team(self,team_id):
    if (team_id == 1):
      team = self.team_one
    else:
      team = self.team_two
    start_time = time.time()

    for balls in range(self.explore_num_balls):
      feature_batter,feature_bowler, batting_action, bowling_action,batter,bowler = team.get_explore_actions()
      wicket, runs = self.get_outcome(feature_batter, feature_bowler, batting_action, bowling_action)

      max_balls = 60
      max_wickets = 5
      max_v = 0
      V=np.zeros((max_balls,max_wickets),dtype=float)
      Q=np.zeros((max_balls,max_wickets,5),dtype=float)
      V[max_balls-1,:] = self.Calculate_Value(np.zeros((max_wickets),dtype=float), feature_batter, feature_bowler)
      for i in range(max_balls-2,-1,-1):
        V[i,:]=self.Calculate_Value(V[i+1,:], feature_batter, feature_bowler)
      self.order[batter] += sum(V)/300


      team.set_explore_outcomes(wicket,runs)
      team.explore_compute()

    team.explore_dp()
    end_time = time.time()
    if(end_time - start_time > self.explore_timeout):
      print("Timing Violation During Exploration Phase")
    return self.order


  def explore_phase(self):
    order1 = self.explore_phase_team(1)
    order2 = self.explore_phase_team(2)
    return order1,order2


  def get_valid_bowler(self,next_bowler):
    if (self.current_bowlers_list[next_bowler]==0):
      print("Bowler Invalid, Choosing Random Bowler")
      bowlers_with_overs_left = np.where(self.current_bowlers_list>0)[0]
      next_bowler = np.random.choice(bowlers_with_overs_left)
    return next_bowler


  def get_valid_batter(self,next_batter):
    if (self.current_batters_list[next_batter]==0):
      print("Batter Invalid, Choosing Random Batter")
      batters_not_out = np.where(self.current_batters_list>0)[0]
      next_batter = np.random.choice(batters_not_out)
    return next_batter

  def next_batter(self,team_batting, order,wicket):
    next_batter = team_batting.get_next_batter(order,wicket)
    next_batter = self.get_valid_batter(next_batter)
    feature_batter = team_batting.self_features[next_batter,0:2]
    return next_batter,feature_batter


  def next_bowler(self,team_bowling):
    next_bowler = team_bowling.get_next_bowler()
    next_bowler = self.get_valid_bowler(next_bowler)
    feature_bowler = team_bowling.self_features[next_bowler,2:4]
    return next_bowler, feature_bowler

  def get_team_batting_action(self,team_batting,ball,total_runs,wickets_left,runs_to_chase,feature_batter,feature_bowler):
    start_time      = time. time()
    batting_action  = team_batting.get_batting_action(ball,total_runs,wickets_left,runs_to_chase,feature_batter,feature_bowler)
    end_time        = time. time()
    if(end_time - start_time > self.action_timeout):
      batting_action = 0 #this is the default option, we have to fix the penalisation strategy
      self.num_miss_team_batting = self.num_miss_team_batting + 1
    return batting_action


  def get_team_bowling_action(self,team_bowling,ball,total_runs,wickets_left,runs_to_chase,feature_batter,feature_bowler):
    start_time      = time. time()
    bowling_action  = team_bowling.get_bowling_action(ball,total_runs,wickets_left,runs_to_chase,feature_batter,feature_bowler)
    end_time        = time. time()
    if(end_time - start_time > self.action_timeout):
      bowling_action = 0 #this is the default option, we have to fix the penalisation strategy
      self.num_miss_team_bowling = self.num_miss_team_bowling + 1
    return bowling_action


  def get_outcome(self,feature_batter, feature_bowler, batting_action, bowling_action):
    # wicket = np.random.randint(0,1)
    # runs = np.random.randint(0,7)
    runs = 0
    pout = pout_actions_min[batting_action]*(1-((feature_batter[0]-1)/4))+((feature_batter[0]-1)/4)*pout_actions_max[batting_action]
    risk = 1
    if (bowling_action == 2):
      risk = risk_max_aggression*(1-((feature_bowler[0]-1)/4))+((feature_bowler[0]-1)/4)*risk_min_aggression
    if (bowling_action == 0):
      risk = risk_max_economical*(1-((feature_bowler[0]-1)/4))+((feature_bowler[0]-1)/4)*risk_min_economical
    pout = pout*risk
    wicket = np.random.choice(2,1,p=[1-pout,pout])
    wicket = wicket[0]
    if (wicket == 0):
      prun = prun_features_max*(1-((feature_batter[1]-1)/4))+((feature_batter[1]-1)/4)*prun_features_min
      easy = 0
      if (bowling_action == 2):
        easy = easy_max_aggression*(1-((feature_bowler[1]-1)/4))+((feature_bowler[1]-1)/4)*easy_min_aggression
      if (bowling_action == 0):
        easy = easy_max_economical*(1-((feature_bowler[1]-1)/4))+((feature_bowler[1]-1)/4)*easy_min_economical
      # print(prun, feature_batter[1],easy)
      prun = prun + easy
      runs = batting_action_runs_map[batting_action]*np.random.choice(2,1,p=[1-prun,prun])
    else:
      #  runs = 0*np.random.choice(2,1,p=[0.5,0.5])
      runs = 0

    return wicket, runs


  def Calculate_Value(self, V_in, feature_batter, feature_bowler):
    #mcts
    runs=[0,1,2,3,4,6]
    max_wickets = 5
    Q_out=np.zeros((max_wickets,6),dtype=float)
    V_out=np.zeros(np.size(V_in),dtype=float) # max wickets size
    shot=np.zeros(np.size(V_in),dtype=int)

    for i in range(1,np.size(V_in)):
      for a in range(np.size(runs)):
        x = np.random.randint(0,3)
        p_w = ((x+0.05)*(a+0.05)*feature_batter[0])/(feature_bowler[0]+62) #self.p_out(feature_batter[0], feature_bowler[0])[x, a] # batting features, bowling features, batting_action, bowling_action ###
        p_r = ((feature_bowler[1]+0.05)/((a+5)*feature_batter[1])) - 0.01  #self.p_run(feature_batter[1], feature_bowler[1])[x]
        # a being runs
        Q_out[i][a]=(1-p_w)*p_r*runs[a]+p_w*V_in[i-1]+(1-p_w)*V_in[i]
      V_out[i]=np.max(Q_out[i,:])
      shot[i]=runs[np.argmax(Q_out[i,:])]
    return V_out


  def innings(self,innigins_id, runs_to_chase, order):

    total_runs = 0
    wickets_left = 5
    self.current_batters_list = np.array([1,1,1,1,1])
    self.current_bowlers_list = np.array([2,2,2,2,2])
    self.num_miss_team_batting = 0
    self.num_miss_team_bowling = 0
    if (innigins_id == 1 ):
      team_batting = self.team_one
      team_bowling = self.team_two
    else:
      team_batting = self.team_two
      team_bowling = self.team_one
    # Initialising the first batter and first bowler
    batter, feature_batter = self.next_batter(team_batting,order,5-wickets_left)
    bowler, feature_bowler = self.next_bowler(team_bowling)

    for ball in range(self.num_balls):
      if np.sum(self.current_batters_list) > 0 :
        batting_action = self.get_team_batting_action(team_batting, self.num_balls - ball, total_runs, wickets_left, runs_to_chase, feature_batter, feature_bowler)
        bowling_action = self.get_team_bowling_action(team_bowling,ball,total_runs,wickets_left,runs_to_chase,feature_batter,feature_bowler)
        wicket, runs   = self.get_outcome(feature_batter, feature_bowler, batting_action, bowling_action)
        print("Batting Action:", batting_action, "\t Bowling Action: ", bowling_action, "\t Runs:", runs, "\t Wickets:", wicket)
        total_runs     = total_runs + runs
        if (wicket > 0):
          wickets_left = wickets_left - 1;
          self.current_batters_list[batter] = 0
          if(np.sum(self.current_batters_list) > 0 ):
            batter,feature_batter = self.next_batter(team_batting,order,wicket)
        if ((ball+1)%6 ==0 ):
          self.current_bowlers_list[bowler] = self.current_bowlers_list[bowler]-1
          if(np.sum(self.current_bowlers_list) > 0 ) :
            bowler, feature_bowler = self.next_bowler(team_bowling)
    # if(dqn_learning):
        #   state = np.array([self.num_balls-ball+1, wickets_left+wicket])
        #   next_state = np.array([[self.num_balls-ball, wickets_left]])
        #   done = False
        #   if(ball == self.num_balls - 1):
        #     done = True
        #   team_batting.agent.step(state, batting_action, runs, next_state, done)

    return total_runs, wickets_left, self.current_batters_list, self.current_bowlers_list, self.num_miss_team_batting, self.num_miss_team_bowling

  def dqn(self, n_episodes=1000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    scores = []
    ''' list containing scores from each episode '''

    scores_window_printing = deque(maxlen=10)
    ''' For printing in the graph '''

    scores_window= deque(maxlen=100)
    ''' last 100 scores for checking if the avg is more than 195 '''

    eps = eps_start
    ''' initialize epsilon '''


    for i_episode in range(1, n_episodes+1):
        # state = env.reset()
        score = 0
        # for t in range(max_t):
            # action = agent.act(state, eps)
            # next_state, reward, done, _ = env.step(action)
            # agent.step(state, action, reward, next_state, done)
            # state = next_state
            # if done:
            #     break
        total_runs, wickets_left, _, _, _, _ = self.innings(1, float('inf'), order1)
        score += total_runs
        print("--------------------------------------------------------------------")

        scores_window.append(score)
        scores_window_printing.append(score)
        ''' save most recent score '''

        eps = max(eps_end, eps_decay*eps)
        ''' decrease epsilon '''

        # print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        # if i_episode % 10 == 0:
        #     scores.append(np.mean(scores_window_printing))
        # if i_episode % 100 == 0:
        #   print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
        # if np.mean(scores_window)>= -110.0:
        #   print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
        #   # break

    return [np.array(scores),i_episode-100]


In [None]:
# setting time outs
explore_timeout = 100
action_timeout = 100

explore_num_balls = 10
num_balls = 110
match = Match(num_balls,explore_num_balls,action_timeout, explore_timeout,Australia,India)
order1,order2 = match.explore_phase()

# mcts(runs = 0, wickets = 0, order)
first_innings_score, wickets_left, batters_list, bowlers_list, num_miss_team_batting, num_miss_team_bowling = match.innings(1, float('inf'), order1)
print(f"Total runs = {first_innings_score}, Wickets left = {wickets_left}")
#second_innings_score, batters_list, bowlers_list, num_miss_team_batting, num_miss_team_bowling = match.innings(2,first_innings_score)

Batting Action: 1 	 Bowling Action:  2 	 Runs: 0 	 Wickets: 1
Batter Invalid, Choosing Random Batter
Batting Action: 0 	 Bowling Action:  1 	 Runs: [0] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  1 	 Runs: [0] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  0 	 Runs: [1] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  1 	 Runs: [0] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  1 	 Runs: [1] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  2 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  2 	 Runs: [1] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  1 	 Runs: [0] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  1 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  2 	 Runs: [0] 	 Wickets: 0
Bat

In [None]:
# setting time outs
explore_timeout = 100
action_timeout = 100

explore_num_balls = 10
num_balls = 110
match = Match(num_balls,explore_num_balls,action_timeout, explore_timeout,Australia,India)
order1,order2 = match.explore_phase()

# mcts(runs = 0, wickets = 0, order)
match.dqn()
print(f"Total runs = {first_innings_score}, Wickets left = {wickets_left}")
#second_innings_score, batters_list, bowlers_list, num_miss_team_batting, num_miss_team_bowling = match.innings(2,first_innings_score)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Batting Action: 1 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  1 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  2 	 Runs: [0] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  0 	 Runs: [1] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  2 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  1 	 Runs: [0] 	 Wickets: 0
Batting Action: 1 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  0 	 Runs: 0 	 Wickets: 1
Batter Invalid, Choosing Random Batter
Batting Action: 1 	 Bowling Action:  2 	 Runs: [1] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  0 	 Runs: [0] 	 Wickets: 0
Batting Action: 0 	 Bowling Action:  2 	 Runs: [0] 	 Wickets: 0
Ba