<a href="https://colab.research.google.com/github/inforeqd512/QLearning/blob/main/Tribe_Q_learning_v5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [380]:
#import libraries
import numpy as np
import math
import copy #for deepcopy()
from enum import Enum
!python -V

Python 3.7.11


In [381]:
class Business_Rules:

  def __init__(self):
    self.target_dr_per_lead = 5
    self.actions = ['reassign', 'promote_dr', 'hire']
    self.ceil_total_leaders_leading = 10
    self.ceil_total_people_in_grade = 50
    self.grade_hierarchy = ["3", "4.1", "4.2", "4.3", "4.4", "5.1", "5.2", "MS"]
    self.leaders_can_only_be_perms = True
    self.perm_grades = ["3", "4.1", "4.2", "4.3", "4.4", "5.1", "5.2"]
    self.chapter_leads_4_point_2_and_above = True
    self.drs_must_be_grades_lower_than_lead = True

  def print_info(self):
    print("Business Rules")
    print(self.target_dr_per_lead)
    print(self.actions)
    print(self.grade_hierarchy)
    print(self.perm_grades)

In [382]:
class Chapter_Stats:
  def __init__(self, 
               chapter, 
               allocated_leaders = {"3":3},
               people_gradewise = {"3":0, "4.1":0, "4.2":0, "4.3":0, "4.4":0, "5.1":0, "5.2":0, "MS":0},
               business_rules = Business_Rules()):
    self.chapter = chapter
    self.allocated_leaders = allocated_leaders
    self.people_gradewise = people_gradewise
    self.business_rules = business_rules

  def grade_hierarchy(self):
    return self.business_rules.grade_hierarchy

  def total_people_in_grade(self, grade):
    return self.people_gradewise[grade]

  def is_all_leader_grades_utilised(self):
    #Currrently consider only Perms grades can lead but in reality MS can lead MS
    #if we find first grade where leaders are yet to lead then break
    is_all_leader_grades_utilised = True
    for grade in self.business_rules.perm_grades:
      num_leaders_in_waiting_in_grade = self.leaders_in_waiting_in_grade(grade)
      if num_leaders_in_waiting_in_grade != None: #if no leaders to allocate at all then leaders_in_waiting cannot be 0
        if num_leaders_in_waiting_in_grade > 0:
          is_all_leader_grades_utilised = False
          break

  def leaders_in_waiting_in_grade(self, grade):
    #how many leaders still can be given a team
    total_possible_leaders = self.total_people_in_grade(grade) 
    num_allocated_leaders = self.num_leads_in_grade(grade)
    num_leaders_in_waiting_in_grade = total_possible_leaders - num_allocated_leaders
    if total_possible_leaders > 0: 
      return num_leaders_in_waiting_in_grade
    else:
      return None

  def leads_uptill_grade(self, grade):
    #count all leaders upto and including grade for calculating drs per lead (average)
    num_leads_uptill_grade = 0 
    for local_grade in self.grade_hierarchy():
      num_leads_uptill_grade += self.num_leads_in_grade(local_grade)
      if local_grade == grade:
        break
    return num_leads_uptill_grade

  def num_leads_in_grade(self, grade):
    if grade in self.allocated_leaders:
      return self.allocated_leaders[grade]
    else:
      return 0

  def all_dr_grades_for_leader_grade(self, grade):
    #all grades that can be drs for the input leader grade
    all_lower_grades = []

    start = False
    for local_grade in self.grade_hierarchy():
      if start == True: #put this in start of loop so that the grade being searched is skipped
        all_lower_grades.append(local_grade)

      if local_grade == grade:
        start = True

    return all_lower_grades

  def num_drs_for_leader_grade(self, grade):
    num_people = 0
    for dr_grade in self.all_dr_grades_for_leader_grade(grade):
      num_people += self.total_people_in_grade(dr_grade)
    return num_people

  def drs_per_lead_for_grade(self, grade):
    total_dr_per_lead = 0
    num_drs = self.num_drs_for_leader_grade(grade)
    num_leads_uptill_grade = self.leads_uptill_grade(grade)
    num_people_in_grade = self.total_people_in_grade(grade)

    if self.grade_hierarchy()[0]:
      highest_grade_with_no_boss = self.grade_hierarchy()[0]

    if num_leads_uptill_grade == 0:
      return total_dr_per_lead

    #dr per lead formula = 1 + 2 + 3
    #1. average distribution : number of drs for that grade distributed over all allocated leaders upto and including that grade
    #2. for that same grade : people in that grade distributed over allocated leaders in all higher grades. 
    #         But due to average to keep n same therefore all allocated leaders upto and including that grade
    #3. for all grades higher than this grade : people in that grade distributed over allocated leaders in all higher grades.
    #         But due to average to keep n same therefore all allocated leaders upto and including that grade
    #         Ignore this computation for the highest grade when computing for grades lower than the highest grade
    for local_grade in self.grade_hierarchy():
      if local_grade == grade: #1.
        drs_per_lead_for_grade = num_drs / num_leads_uptill_grade
        total_dr_per_lead += drs_per_lead_for_grade
        print("grade same, drs over all allocated leaders : ", local_grade, " : ", drs_per_lead_for_grade, "total : ",total_dr_per_lead)
        drs_per_lead_for_grade_people_led_by_higher_grades = num_people_in_grade / num_leads_uptill_grade #2.
        total_dr_per_lead += drs_per_lead_for_grade_people_led_by_higher_grades
        print("grade same, grade people over all allocated leaders : ", local_grade, " : ", drs_per_lead_for_grade_people_led_by_higher_grades, "total : ",total_dr_per_lead)
        break #stop when the perm grade is same as the grade
      
      if local_grade == highest_grade_with_no_boss:
        #ignore
        pass
      else:
        num_total_people_in_grade = self.total_people_in_grade(local_grade)
        drs_per_lead_for_grade = num_total_people_in_grade / num_leads_uptill_grade #3.
        total_dr_per_lead += drs_per_lead_for_grade
        print(local_grade, ":", drs_per_lead_for_grade, "total : ",total_dr_per_lead)

    return total_dr_per_lead

  def drs_per_lead_for_max_leader_grade_allocated(self):
    #for the max grade for which a leader has been allocated, compute the drs_per_lead

    #also get the max grade for which a leader has been allocated by increasing order of grade_hierarchy
    max_grade_of_allocated_leader = None
    for grade in self.grade_hierarchy():
      if grade in self.allocated_leaders:
        if self.allocated_leaders[grade] > 0:
          max_grade_of_allocated_leader = grade

    print("max_grade_of_allocated_leader **** :", max_grade_of_allocated_leader)

    num_dr_per_lead = None
    if max_grade_of_allocated_leader != None:
      num_dr_per_lead = self.drs_per_lead_for_grade(max_grade_of_allocated_leader)
      print("num_dr_per_lead **** :", num_dr_per_lead)

    return num_dr_per_lead


  def print_info(self):
    print("\n\nChapter_Stats_v2")
    print("chapter :", self.chapter)
    print("allocated_leaders :", self.allocated_leaders)
    print("people_gradewise :", self.people_gradewise)
    print("drs_per_lead : ", self.drs_per_lead_for_max_leader_grade_allocated())

  def chapter_stats_values_for_hashing(self):
    hash_value = (self.chapter, tuple(self.allocated_leaders.items()), tuple(self.people_gradewise.items()))
    return hash_value
      
  def assign_leader_in_waiting_to_lead_for_grade(self, grade):
    did_assign = False
    num_leaders_in_waiting_in_grade = self.leaders_in_waiting_in_grade(grade)
    if num_leaders_in_waiting_in_grade != None:
      if num_leaders_in_waiting_in_grade > 0:
        if grade in self.allocated_leaders:
          num_allocated_leaders = self.allocated_leaders[grade]
          num_allocated_leaders += 1
          self.allocated_leaders[grade] = num_allocated_leaders
          print("assign_leader_in_waiting_to_lead_for_grade :", grade,  num_allocated_leaders)
          did_assign = True
        else:
          self.allocated_leaders[grade] = 1
          print("assign_leader_in_waiting_to_lead_for_grade :", grade,  1)
          did_assign = True
    return did_assign

  def assign_leader_in_waiting_to_lead(self):
    #based on grade hierarchy, when you find the highest grade which has leaders in waiting, then allocate one of that leader to lead a team
    for grade in self.grade_hierarchy(): 
      did_assign = self.assign_leader_in_waiting_to_lead_for_grade(grade)
      if did_assign: 
        break
          
  def promote_leaders_in_waiting_randomly(self):
    #if current drs per lead is > target drs per lead, then we need to promote people to be in the leader position to meet the target
    #we can find such people if all grades higher than them are fully leading and they have people who can lead
    #q-learning will be able to maximise promoting the ones which will increase chances of meeting the target drs_per_lead
    
    #get list of all promotable grades that still have leaders in waiting
    grades_with_leaders_in_waiting = {}
    indexes = len(self.grade_hierarchy()) - 1
    for idx in range(indexes):
      if idx == 0:
        pass
      else:
        grade = self.grade_hierarchy()[idx]
        num_leaders_in_waiting_in_grade = self.leaders_in_waiting_in_grade(grade)
        if num_leaders_in_waiting_in_grade != None:
          if num_leaders_in_waiting_in_grade > 0:
            grades_with_leaders_in_waiting[grade] =  self.grade_hierarchy()[idx -1] # can be promoted to this higher grade

    grade_to_promote = np.random.choice(list(grades_with_leaders_in_waiting))
    print("before promotion :", self.people_gradewise )
    promoted_to_grade = grades_with_leaders_in_waiting[grade_to_promote]

    self.people_gradewise[grade_to_promote] -= 1
    self.people_gradewise[promoted_to_grade] += 1

    print("after promotion :", grade_to_promote, "to :", promoted_to_grade, self.people_gradewise)
    print("then assign to get the reward")
    self.assign_leader_in_waiting_to_lead_for_grade(promoted_to_grade)

            

In [383]:
class State:

    def __init__(self, chapter_stats, business_rules):
      self.chapter_stats = chapter_stats
      self.business_rules = business_rules

    def print_info(self):
      self.chapter_stats.print_info()
      self.business_rules.print_info()

    def state_hash(self):
      hash_value = hash(self.chapter_stats.chapter_stats_values_for_hashing())
      return hash_value

    def state_copy(self):
      new_state = copy.deepcopy(self)
      return new_state
    
    def drs_per_lead_for_max_leader_grade_allocated(self):
      return self.chapter_stats.drs_per_lead_for_max_leader_grade_allocated()

    def target_dr_per_lead(self):
      return self.business_rules.target_dr_per_lead

    def is_all_leader_grades_utilised(self):
      return self.chapter_stats.is_all_leader_grades_utilised()
    
    def assign_leader_in_waiting_to_lead(self):
      self.chapter_stats.assign_leader_in_waiting_to_lead()
    
    def promote_leaders_in_waiting_randomly(self):
      self.chapter_stats.promote_leaders_in_waiting_randomly()

In [384]:
class ActionList:

    def __init__(self, action_strings):
      self.actions=[]
      self.list_action_hash=[]
      for action in action_strings:
        ac = Action(action)
        self.actions.append(ac)
        self.list_action_hash.append(ac.action_hash)

In [385]:
class Action:

  def __init__(self, action_string):
    self.action = action_string
    self.action_hash = hash(action_string)
    return

  def perform_action(self, state):
    if self.action == "reassign":
      return Reassign.run(state)
    elif self.action == "promote_dr":
      return Promote_DR.run(state)
    elif self.action == "hire":
      return Hire.run(state)

  def print_info(self):
    print(self.action, self.action_hash)

In [386]:
class Reassign:

  def __init__(self):
    #do nothing now
    return

  @classmethod
  def run(self, state):
    print("\n\n******reassign*******")
    new_state = state.state_copy()
    new_state.assign_leader_in_waiting_to_lead()
    return new_state

In [387]:
class Promote_DR:
  def __init__(self):
    return

  @classmethod
  def run(self, state):
    print("\n\n******promote_dr*******")
    new_state = state.state_copy()
    new_state.promote_leaders_in_waiting_randomly()
    return new_state

In [388]:
class Hire:
  def __init__(self):
    return

  @classmethod
  def run(self, state):
    new_state = state #do nothing for now so return the same state
    return new_state

In [389]:
class Q_table:
  """
  dict[key(state hash, action hash)] = q-values
  """
  
  def __init__(self):
    self.Q = {}
    return

  def getStateActionHashPairKey(self, state_hash, action_hash):
    """ Returns state-pair hash key, requires separate state and action hash keys first """
    # return state_hash*action_hash
    return str(state_hash)+" "+str(action_hash)

  def getActionHashListForNewState(self, actions):
    list_action_hash = []
    for action in actions:
      list_action_hash.append(action.action_hash)
    return list_action_hash

  def getValueQ(self, state_hash, action_hash):
    """ Get expected reward given an action in a given state,
        returns 0 if the state-action pair has not been seen before.
        Input is state and action hash key                          """

    state_action_key = self.getStateActionHashPairKey(state_hash, action_hash)
    if state_action_key in self.Q:
        return self.Q.get(state_action_key)
    else:
        self.Q[state_action_key] = 0
        return 0

  def setValueQ(self, state_hash, action_hash, value):
    """ Set value in Q """
    state_action_key = self.getStateActionHashPairKey(state_hash, action_hash)
    self.Q[state_action_key] = value

  def getBestAction(self, state_hash, list_action_hash, list_actions):
    """ Get best action given a set of possible actions in a given state """

    best_action = list_actions[0]

    # Find action that given largest Q in given state
    maxQ = 0
    for a_hash, action in zip(list_action_hash, list_actions):
        tmpQ = self.getValueQ(state_hash, a_hash)
        if maxQ < tmpQ:
          maxQ = tmpQ
          best_action = action

    return best_action

  def getMaxQ(self, state_hash, list_action_hash):
      """ Returns the maximum Q value given a state and list of actions (input is hash keys) """
      maxQ = 0
      for a_hash in list_action_hash:
          tmpQ = self.getValueQ(state_hash, a_hash) 
          if maxQ < tmpQ:
              maxQ = tmpQ
      return maxQ


  def updateQ(self, old_state, action_to_perform, reward, new_state, next_possible_actions, discount_factor, learning_rate):
    """ Implements Q-learning iterative algorithm """

    state_hash = old_state.state_hash()
    action_hash = action_to_perform.action_hash

    # Get current Q Value
    old_q_value = self.getValueQ(state_hash, action_hash)

    # Find max Q value given the possible set of actions in the next state
    list_action_hash = self.getActionHashListForNewState(next_possible_actions)
    max_nextQ = self.getMaxQ(new_state.state_hash(), list_action_hash) 
    
    temporal_difference = reward + (discount_factor * max_nextQ) - old_q_value

    #update the Q-value for the previous state and action pair
    new_q_value = old_q_value + (learning_rate * temporal_difference)
    self.setValueQ(state_hash, action_hash, new_q_value)
    # print("updateQ", state_hash, action_hash, new_q_value)

  def print_info(self):
    print("\n\nQ_table")
    print(self.Q)

In [390]:
class Reward:
  def __init__(self):
    return
  
  def reward_function(self, old_state, action_to_perform):
    """ 
    Returns positive value if actions gets closer to target state and values are the size of step closer to the target, 
    Returns -ve value if action take away from target
    """

    #this assumes that the action_to_perform has same results every time and is not random
    new_state = action_to_perform.perform_action(old_state)

    if new_state.is_all_leader_grades_utilised():
      reward = -100 #possible promotion scenario
      print("*****1st cond*****", new_state.is_all_leader_grades_utilised())
      return reward
    
    if new_state.drs_per_lead_for_max_leader_grade_allocated() == 0:
      reward = -100
    elif new_state.drs_per_lead_for_max_leader_grade_allocated() > 0 and new_state.drs_per_lead_for_max_leader_grade_allocated() <= new_state.target_dr_per_lead():
      print("*****2nd cond*****", new_state.drs_per_lead_for_max_leader_grade_allocated(), "****", new_state.target_dr_per_lead())
      reward = 100
    else:
      reward = old_state.drs_per_lead_for_max_leader_grade_allocated() - new_state.drs_per_lead_for_max_leader_grade_allocated()

    return (reward, new_state)

In [391]:
class Environment:

  def __init__(self):
    self.business_rules = Business_Rules()
    self.action_list = ActionList(self.business_rules.actions)
    self.reward = Reward()

  def possible_actions_in_state(self, state):
    return self.action_list.actions

  def hasReachedTerminalState(self, reward):
    if reward == 100 or reward == -100:
      return True
    else:
      return False

In [392]:
class Episode:

  preconditioned_episodes = [
            {"chapter": "Analyst", 
             "allocated_leaders": {"3":0, "4.1":1}, \
             "people_gradewise" : {"3":0, "4.1":1, "4.2":6, "4.3":6, "4.4":1, "5.1":0, "5.2":2, "MS":7} },
            {"chapter": "ios", 
             "allocated_leaders": {"3":0, "4.1":0, "4.2":1}, \
             "people_gradewise" : {"3":0, "4.1":0, "4.2":5, "4.3":6, "4.4":2, "5.1":0, "5.2":0, "MS":14} },
            {"chapter": "android", \
             "allocated_leaders": {"3":0, "4.1":1}, \
             "people_gradewise" : {"3":0, "4.1":0, "4.2":4, "4.3":4, "4.4":4, "5.1":0, "5.2":0, "MS":9} },
            {"chapter": "be engg", \
             "allocated_leaders": {"3":2.5}, \
             "people_gradewise" : {"3":2.5, "4.1":3, "4.2":4, "4.3":6, "4.4":5, "5.1":1, "5.2":2, "MS":27} },
            {"chapter": "delivery leads", \
             "allocated_leaders": {"3":0, "4.1":1}, \
             "people_gradewise" : {"3":0, "4.1":1, "4.2":3.5, "4.3":0.5, "4.4":0.8, "5.1":0, "5.2":0, "MS":3} },
            {"chapter": "environment", \
             "allocated_leaders": {"3":0, "4.1":0, "4.2":1}, \
             "people_gradewise" : {"3":0, "4.1":0, "4.2":2, "4.3":1, "4.4":1, "5.1":0, "5.2":0, "MS":2} },
            {"chapter": "sre", \
             "allocated_leaders": {"3":1, "4.1":1}, \
             "people_gradewise" : {"3":1, "4.1":1, "4.2":1, "4.3":7, "4.4":4, "5.1":0, "5.2":0, "MS":3} },
            {"chapter": "testing", \
             "allocated_leaders": {"3":0, "4.1":1}, \
             "people_gradewise" : {"3":0, "4.1":0, "4.2":1.5, "4.3":3, "4.4":2, "5.1":1, "5.2":0, "MS":18} },
            {"chapter": "reassign wont work", \
             "allocated_leaders": {"3":2.5, "4.1":1}, \
             "people_gradewise" : {"3":2.5, "4.1":3, "4.2":0, "4.3":0, "4.4":5, "5.1":1, "5.2":2, "MS":27} },
  ]

  def __init__(self, episode_number):
    self.episode_number = episode_number
    self.state = None
    if self.episode_number < len(self.preconditioned_episodes):
      business_rules = Business_Rules()
      dict = self.preconditioned_episodes[self.episode_number]
      print("preconditioned_episode :\n", dict)
      chapter_stats = Chapter_Stats(chapter=dict["chapter"],allocated_leaders=dict["allocated_leaders"],people_gradewise=dict["people_gradewise"],business_rules=business_rules)
      self.state = State(chapter_stats, business_rules)
    else:
      business_rules = Business_Rules()

      people_gradewise={}
      for grade in business_rules.grade_hierarchy:
        people_gradewise[grade] = np.random.randint(business_rules.ceil_total_people_in_grade)

      total_leaders_leading = np.random.randint(0,len(business_rules.perm_grades)-1)
      allocated_leaders={}
      i=0
      for grade in people_gradewise:
        total_people = people_gradewise[grade]
        if total_people > 0:
          allocated_leaders[grade] = np.random.randint(total_people)
        i += 1
        if i > total_leaders_leading:
          break


      chapter_stats = Chapter_Stats(chapter="chapter",allocated_leaders=allocated_leaders,people_gradewise=people_gradewise,business_rules=business_rules)
      print("simulated episode data :")
      chapter_stats.print_info()
      self.state = State(chapter_stats, business_rules)

    return

In [393]:
class Trainer:

  def __init__(self):
    self.q_table = Q_table()
    self.environment = Environment()

    #define training parameters
    self.epsilon = 0.9 #the percentage of time when we should take the best action (instead of a random action)
    self.discount_factor = 0.9 #discount factor for future rewards
    self.learning_rate = 0.9 #the rate at which the AI agent should learn
    self.num_episodes = 15 #1000

    for action in self.environment.action_list.actions:
      action.print_info()

    return

  
  #define an epsilon greedy algorithm that will choose which action to take next 
  def get_next_action(self, state_hash, list_action_hash, list_actions, epsilon):
    #if a randomly chosen value between 0 and 1 is less than epsilon, 
    #then choose the most promising value from the Q-table for this state.
    if np.random.random() < epsilon:
      best_action = self.q_table.getBestAction(state_hash, list_action_hash, list_actions)
      return best_action
    else: #choose a random action
      random_idx = np.random.choice(len(list_actions))
      random_action = list_actions[random_idx]
      return random_action

  def train(self, should_print):
    #run through 1000 training episodes
    for episode_number in range(self.num_episodes):
      #get the starting state for this episode
      episode = Episode(episode_number)
      print("episode_number : ", episode.episode_number)

      #continue taking actions  until we reach a terminal state
      #(i.e., until we reach the final team structure or hit a bad structure)
      while True:

        #choose which action to take 
        action_to_perform = self.get_next_action(episode.state.state_hash(), \
                                                 self.environment.action_list.list_action_hash, \
                                                 self.environment.action_list.actions, \
                                                 self.epsilon)

        #perform the chosen action, and transition to the next state 
        old_state = episode.state

        #receive the reward for moving to the new state
        reward, new_state = self.environment.reward.reward_function(old_state, action_to_perform)

        if should_print:
          print("*********episode state after reward********  : ")
          episode.state.print_info()        
          print("\n action_to_perform :\n")
          action_to_perform.print_info()
          print("\n new_state :\n")
          new_state.print_info()        
          print("\nreward: \n", reward)

        if self.environment.hasReachedTerminalState(reward):
          episode.state = new_state
          print("breaking... terminal state \n")
          break

        next_possible_actions = self.environment.possible_actions_in_state(new_state)
        self.q_table.updateQ(old_state, action_to_perform, reward, new_state, next_possible_actions, self.discount_factor, self.learning_rate)

        episode.state = new_state

 #Define a function that will get the shortest path between the starting out team structure and the most favorable team structure 
  def get_shortest_path(self, old_state):
    i=0
    shortest_path = []
    shortest_path.append(old_state)
    while True:
      action_to_perform = self.get_next_action(old_state.state_hash(), \
                                               self.environment.action_list.list_action_hash, \
                                               self.environment.action_list.actions, 
                                               1.) #always the best action

      reward, new_state = self.environment.reward.reward_function(old_state, action_to_perform)
      print("reward :", reward)
            
      shortest_path.append(new_state)

      if self.environment.hasReachedTerminalState(reward):
        episode.state = new_state
        break

      old_state = new_state
    
    for state in shortest_path:
      state.print_info()

# TESTING

In [394]:
import numpy
cs_v2 = Chapter_Stats("BE Chapter", 
                         allocated_leaders = {"3":3, "4.1":3, "4.2":4, "4.3":2},
                         people_gradewise = {"3":3, "4.1":3, "4.2":4, "4.3":6, "4.4":5, "5.1":1, "5.2":2, "MS":27}
                         )

cs_v2.promote_leaders_in_waiting_randomly()
# print(cs_v2.allocated_leaders.items())
# print(list(cs_v2.allocated_leaders.items()))
# print(numpy.random.choice(list(cs_v2.allocated_leaders)))

before promotion : {'3': 3, '4.1': 3, '4.2': 4, '4.3': 6, '4.4': 5, '5.1': 1, '5.2': 2, 'MS': 27}
after promotion : 5.2 to : 5.1 {'3': 3, '4.1': 3, '4.2': 4, '4.3': 6, '4.4': 5, '5.1': 2, '5.2': 1, 'MS': 27}
then assign to get the reward
assign_leader_in_waiting_to_lead_for_grade : 5.1 1


# Chapter_Stats Testing

In [395]:
cs_v2 = Chapter_Stats("BE Chapter", 
                         allocated_leaders = {"3":3, "4.1":3, "4.2":4, "4.3":2},
                         people_gradewise = {"3":3, "4.1":3, "4.2":4, "4.3":6, "4.4":5, "5.1":1, "5.2":2, "MS":27}
                         )

In [396]:
print(cs_v2.drs_per_lead_for_max_leader_grade_allocated())

max_grade_of_allocated_leader **** : 4.3
4.1 : 0.25 total :  0.25
4.2 : 0.3333333333333333 total :  0.5833333333333333
grade same, drs over all allocated leaders :  4.3  :  2.9166666666666665 total :  3.5
grade same, grade people over all allocated leaders :  4.3  :  0.5 total :  4.0
num_dr_per_lead **** : 4.0
4.0


In [397]:
print(cs_v2.total_people_in_grade("3"))
print(cs_v2.total_people_in_grade("4.1"))
print(cs_v2.total_people_in_grade("MS"))

3
3
27


In [398]:
print(cs_v2.leaders_in_waiting_in_grade("3"))
print(cs_v2.leaders_in_waiting_in_grade("4.1"))
print(cs_v2.leaders_in_waiting_in_grade("MS"))

0
0
27


In [399]:
print(cs_v2.all_dr_grades_for_leader_grade("3"))
print(cs_v2.all_dr_grades_for_leader_grade("4.1"))
print(cs_v2.all_dr_grades_for_leader_grade("MS"))

print(cs_v2.num_drs_for_leader_grade("3"))
print(cs_v2.num_drs_for_leader_grade("4.1"))
print(cs_v2.num_drs_for_leader_grade("MS"))

['4.1', '4.2', '4.3', '4.4', '5.1', '5.2', 'MS']
['4.2', '4.3', '4.4', '5.1', '5.2', 'MS']
[]
48
45
0


In [400]:
print(cs_v2.leads_uptill_grade("3"))

3


In [401]:
print(cs_v2.drs_per_lead_for_grade("3"))


grade same, drs over all allocated leaders :  3  :  16.0 total :  16.0
grade same, grade people over all allocated leaders :  3  :  1.0 total :  17.0
17.0


In [402]:
print(cs_v2.drs_per_lead_for_grade("4.1"))


grade same, drs over all allocated leaders :  4.1  :  7.5 total :  7.5
grade same, grade people over all allocated leaders :  4.1  :  0.5 total :  8.0
8.0


In [403]:
print(cs_v2.drs_per_lead_for_grade("4.2"))

4.1 : 0.3 total :  0.3
grade same, drs over all allocated leaders :  4.2  :  4.1 total :  4.3999999999999995
grade same, grade people over all allocated leaders :  4.2  :  0.4 total :  4.8
4.8


In [404]:
print(cs_v2.drs_per_lead_for_grade("4.3"))

4.1 : 0.25 total :  0.25
4.2 : 0.3333333333333333 total :  0.5833333333333333
grade same, drs over all allocated leaders :  4.3  :  2.9166666666666665 total :  3.5
grade same, grade people over all allocated leaders :  4.3  :  0.5 total :  4.0
4.0


# State testing

In [405]:
#two states with same values should have same hash
cs_v2 = Chapter_Stats("BE Chapter", 
                         allocated_leaders = {"3":3, "4.1":3, "4.2":4, "4.3":2},
                         people_gradewise = {"3":3, "4.1":3, "4.2":4, "4.3":6, "4.4":5, "5.1":1, "5.2":2, "MS":27}
                         )

st1=State(cs_v2,Business_Rules())

st2=State(cs_v2,Business_Rules())

print(st1.state_hash())
print(st2.state_hash())


-4482967398936304121
-4482967398936304121


# Action Testing

In [406]:
al = ActionList(Business_Rules().actions)
print(al.actions)
for action in al.actions:
  print(action.print_info())
print(al.list_action_hash)

[<__main__.Action object at 0x7f4168a44650>, <__main__.Action object at 0x7f4168a44810>, <__main__.Action object at 0x7f4168a44090>]
reassign 2583310774460506101
None
promote_dr 2146857254869273101
None
hire -6523724660081691562
None
[2583310774460506101, 2146857254869273101, -6523724660081691562]


# Reward Testing

In [407]:
cs_v2 = Chapter_Stats("BE Chapter", 
                         allocated_leaders = {"3":3, "4.1":3, "4.2":4, "4.3":2},
                         people_gradewise = {"3":3, "4.1":3, "4.2":4, "4.3":6, "4.4":5, "5.1":1, "5.2":2, "MS":27}
                         )

st=State(cs_v2,Business_Rules())

rw = Reward()
act = Action("reassign")

st.print_info()

reward, new_state = rw.reward_function(st, act)
print("next", 2)
print(reward)
new_state.print_info()

reward, new_state = rw.reward_function(new_state, act)
print("next", 3)
print(reward)
new_state.print_info()

reward, new_state = rw.reward_function(new_state, act)
print("next", 4)
print(reward)
new_state.print_info()

reward, new_state = rw.reward_function(new_state, act)
print("next", 5)
print(reward)



Chapter_Stats_v2
chapter : BE Chapter
allocated_leaders : {'3': 3, '4.1': 3, '4.2': 4, '4.3': 2}
people_gradewise : {'3': 3, '4.1': 3, '4.2': 4, '4.3': 6, '4.4': 5, '5.1': 1, '5.2': 2, 'MS': 27}
max_grade_of_allocated_leader **** : 4.3
4.1 : 0.25 total :  0.25
4.2 : 0.3333333333333333 total :  0.5833333333333333
grade same, drs over all allocated leaders :  4.3  :  2.9166666666666665 total :  3.5
grade same, grade people over all allocated leaders :  4.3  :  0.5 total :  4.0
num_dr_per_lead **** : 4.0
drs_per_lead :  4.0
Business Rules
5
['reassign', 'promote_dr', 'hire']
['3', '4.1', '4.2', '4.3', '4.4', '5.1', '5.2', 'MS']
['3', '4.1', '4.2', '4.3', '4.4', '5.1', '5.2']


******reassign*******
assign_leader_in_waiting_to_lead_for_grade : 4.3 3
max_grade_of_allocated_leader **** : 4.3
4.1 : 0.23076923076923078 total :  0.23076923076923078
4.2 : 0.3076923076923077 total :  0.5384615384615385
grade same, drs over all allocated leaders :  4.3  :  2.6923076923076925 total :  3.230769230

# Trainer Testing

In [408]:
trainer = Trainer()

trainer.train(False)

reassign 2583310774460506101
promote_dr 2146857254869273101
hire -6523724660081691562
preconditioned_episode :
 {'chapter': 'Analyst', 'allocated_leaders': {'3': 0, '4.1': 1}, 'people_gradewise': {'3': 0, '4.1': 1, '4.2': 6, '4.3': 6, '4.4': 1, '5.1': 0, '5.2': 2, 'MS': 7}}
episode_number :  0


******reassign*******
assign_leader_in_waiting_to_lead_for_grade : 4.2 1
max_grade_of_allocated_leader **** : 4.2
4.1 : 0.5 total :  0.5
grade same, drs over all allocated leaders :  4.2  :  8.0 total :  8.5
grade same, grade people over all allocated leaders :  4.2  :  3.0 total :  11.5
num_dr_per_lead **** : 11.5
max_grade_of_allocated_leader **** : 4.2
4.1 : 0.5 total :  0.5
grade same, drs over all allocated leaders :  4.2  :  8.0 total :  8.5
grade same, grade people over all allocated leaders :  4.2  :  3.0 total :  11.5
num_dr_per_lead **** : 11.5
max_grade_of_allocated_leader **** : 4.2
4.1 : 0.5 total :  0.5
grade same, drs over all allocated leaders :  4.2  :  8.0 total :  8.5
grade s

In [409]:
trainer.q_table.print_info()



Q_table
{'-6111748635087182833 2583310774460506101': 10.35, '-6111748635087182833 2146857254869273101': 0, '-6111748635087182833 -6523724660081691562': 0, '4376600235592428563 2583310774460506101': 3.4500000000000006, '4376600235592428563 2146857254869273101': 0, '4376600235592428563 -6523724660081691562': 0, '713318019173734460 2583310774460506101': 1.7249999999999994, '713318019173734460 2146857254869273101': 0, '713318019173734460 -6523724660081691562': 0, '75345670857687377 2583310774460506101': 0, '75345670857687377 2146857254869273101': 0, '75345670857687377 -6523724660081691562': 0, '3171570465457469436 2583310774460506101': 12.15, '3171570465457469436 2146857254869273101': 0, '3171570465457469436 -6523724660081691562': 0, '4801090556202369139 2583310774460506101': 4.05, '4801090556202369139 2146857254869273101': 0, '4801090556202369139 -6523724660081691562': 0, '2815515546372624202 2583310774460506101': 2.025, '2815515546372624202 2146857254869273101': 0, '2815515546372624202

# Episode Testing - Inference

In [410]:
episode = Episode(8)
trainer.get_shortest_path(episode.state)

preconditioned_episode :
 {'chapter': 'reassign wont work', 'allocated_leaders': {'3': 2.5, '4.1': 1}, 'people_gradewise': {'3': 2.5, '4.1': 3, '4.2': 0, '4.3': 0, '4.4': 5, '5.1': 1, '5.2': 2, 'MS': 27}}


******reassign*******
assign_leader_in_waiting_to_lead_for_grade : 4.1 2
max_grade_of_allocated_leader **** : 4.1
grade same, drs over all allocated leaders :  4.1  :  7.777777777777778 total :  7.777777777777778
grade same, grade people over all allocated leaders :  4.1  :  0.6666666666666666 total :  8.444444444444445
num_dr_per_lead **** : 8.444444444444445
max_grade_of_allocated_leader **** : 4.1
grade same, drs over all allocated leaders :  4.1  :  7.777777777777778 total :  7.777777777777778
grade same, grade people over all allocated leaders :  4.1  :  0.6666666666666666 total :  8.444444444444445
num_dr_per_lead **** : 8.444444444444445
max_grade_of_allocated_leader **** : 4.1
grade same, drs over all allocated leaders :  4.1  :  7.777777777777778 total :  7.777777777777778


In [411]:
dict = {"a":1,"b":1}
len(dict)

2