In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import deque
import random
import plotly.express as px

In [None]:
# General Variables

R, S, T, P = 2, 0, 3, 1
Payoff_Matrix = np.array([[R,S],[T,P]])

C = np.array([1,0])  # Cooporation array
D = np.array([0,1])  # Defection array
capacity = 500       # Capacity of history and rewards

print("Payoff Matrix : \n",Payoff_Matrix)
print("Cooporative Vector : \n",C)
print("Defective Vector : \n",D)

Payoff Matrix : 
 [[2 0]
 [3 1]]
Cooporative Vector : 
 [1 0]
Defective Vector : 
 [0 1]


In [None]:
# Define a Class of History

class History(object):

    def __init__(self, capacity):
        self.history = deque(maxlen= capacity)  # Define a queue with maxlen "capacity"
        self.push(C,C)   # push a tuple of two Cooporative actions

    def push(self, action, opponent_action):
        # Add the tuple of actions
        self.history.append( (action, opponent_action) )

    def last(self):
        # Gives the last tuple of action in history
      return self.history[-1]

    def __len__(self):
        # gives the length of history
        return len(self.history)

    def clear(self):
      # clear the history which basiclly replace it with another queue and push a tuple of cooporative actions
      self.history = deque(maxlen= capacity)
      self.push(C,C)

In [None]:
# define a class of rewards for players

class Reward(object):

    def __init__(self, capacity):
      self.reward = deque(maxlen= capacity) # creat a queue with maximum length of capacity

    def call(self, action, opponent_action):
      # compute the reward of respective player and append it in reward
      self.r = action.T @ ( Payoff_Matrix @ opponent_action )
      self.reward.append( self.r )

    def clear(self):
      # clear rewards of player which basically replace it with another queue
      self.reward = deque(maxlen= capacity)

    def gain(self, len= 100):
      # compute the total sum of last "len" number of rewards 
      return np.sum(list(self.reward)[-len:])

In [None]:
# here I define different class of histories and rewards for each player to keep track of every one of them
# Define history of players

nice_history        = History( capacity= capacity )
bad_history         = History( capacity= capacity )
mainly_nice_history = History( capacity= capacity )
mainly_bad_history  = History( capacity= capacity )
evolvable_history   = History( capacity= capacity )
tit_for_tat_history = History( capacity= capacity )

# Define reward of players

nice_reward        = Reward( capacity= capacity )
bad_reward         = Reward( capacity= capacity )
mainly_nice_reward = Reward( capacity= capacity )
mainly_bad_reward  = Reward( capacity= capacity )
evolvable_reward   = Reward( capacity= capacity )
tit_for_tat_reward = Reward( capacity= capacity )

In [None]:
# define a class for nice strategy

class Nice(object):

    def __init__(self, player= C):
      # initializing the class
      self.name = "Nice"            # name of the stategy
      self.action = C               # action of the strategy
      self.new_opp_action = player  # acttion of opponent

      nice_history.push(self.action, self.new_opp_action)  # pushing the tuple of actions to history
      nice_reward.call(self.action, self.new_opp_action)   # computing the reward and save it in reward
      self.reward = nice_reward.reward.copy()              # make a copy of the reward for further purposes 

    def clear(self):  
      # its a function to clean the history and reward
      nice_reward.clear()
      nice_history.clear()

In [None]:
# define a class for bad strategy

class Bad(object):

    def __init__(self, player= C):
      # initializing the class
      self.name = "Bad"               # name of the strategy
      self.action = D                 # action of the strategy
      self.new_opp_action = player    # action of opponent

      bad_history.push(self.action, self.new_opp_action)    # pushing the tuple of actions to history
      bad_reward.call(self.action, self.new_opp_action)     # computing the reward and save it in reward
      self.reward = bad_reward.reward.copy()                # make a copy of the reward for further purposes

    def clear(self):
      # its a function to clean the history and reward
      bad_reward.clear()
      bad_history.clear()
      

In [None]:
# define a class for mainly nice strategy with a k= 20

class Mainly_Nice(object):

    def __init__(self, player= C, k= 20):
      # initializing the class
      self.name = "Mainly Nice"       # name of the strategy
      # defining the action of strategy

      if random.random() < k/100:
        self.action = D
      else:
        self.action = C
      self.new_opp_action = player    # action of opponent 

      mainly_nice_history.push(self.action, self.new_opp_action)    # pushing the tuple of actions to history
      mainly_nice_reward.call(self.action, self.new_opp_action)     # computing the reward and save it in reward
      self.reward = mainly_nice_reward.reward.copy()                # make a copy of the reward for further purposes

    def clear(self):
      # its a function to clean the history and reward
      mainly_nice_reward.clear()
      mainly_nice_history.clear()
      

In [None]:
# define a class for mainly bad strategy with k=80

class Mainly_Bad(object):

    def __init__(self, player= C, k= 80):
      # initialize the class

      self.name = "Mainly Bad"            # name of the strategy

      # define action of the strategy
      if random.random() > k/100 :
        self.action = D
      else:
        self.action = C

      self.new_opp_action = player        # action of opponent

      mainly_bad_history.push(self.action, self.new_opp_action)     # pushing the tuple of actions to history
      mainly_bad_reward.call(self.action, self.new_opp_action)      # computing the reward and save it in reward
      self.reward = mainly_bad_reward.reward.copy()                 # make a copy of the reward for further purposes

    def clear(self):
      # its a function to clean the history and reward
      mainly_bad_reward.clear()
      mainly_bad_history.clear()
      

In [None]:
# define a class for tit for tat strategy

class Tit_For_Tat(object):

    def __init__(self, player= C):
      # initialize the class
      self.name = "Tit For Tat"                                         # name of class
      self.last_opp_action = tit_for_tat_history.history[-1][-1]        # action of last opponent
      self.action = self.last_opp_action                                # action of strategy which is the last opponent's action
      self.new_opp_action = player                                      # action of new opponent

      tit_for_tat_history.push(self.action, self.new_opp_action)        # pushing the tuple of actions to history
      tit_for_tat_reward.call(self.action, self.new_opp_action)         # computing the reward and save it in reward
      self.reward = tit_for_tat_reward.reward.copy()                    # make a copy of the reward for further purposes

    def clear(self):
      # its a function to clean the history and reward
      tit_for_tat_reward.clear()
      tit_for_tat_history.clear()
      

In [None]:
"""
 -define a class for evolvable strategy which will be used in mutation part( for natural selection with atomic mutation mode)
 -this strategy randomly choose an action from a gamma distribution function with a given rate and shape
 -the rate will be mutation rate which is fixed by hand
 -the shape will be fitness_rate which is actually a rate based on the gains of substitute palyer
 -the fitness_rate will be change through the process 
"""

class EvolvablePlayer ( object ):

  def __init__(self, player = C , rate = 0.1, shape = 1):
    # initialize the class
    self.name = 'Evolvable Player'                                      # name of the strategy
    
    # define action of the strategy
    self.r = np.random.gamma( shape = shape, scale= float(1/rate) )     # random sample from gamma distribution
    if self.r < 0.5 :
      self.action = D
    else :
      self.action = C
    
    self.new_opp_action = player                                        # action of opponent

    evolvable_history.push( self.action, self.new_opp_action )          # pushing the tuple of actions to history
    evolvable_reward.call( self.action, self.new_opp_action )           # computing the reward and save it in reward
    self.reward = evolvable_reward.reward.copy()                        # make a copy of the reward for further purposes

  def clear ( self ) :
    # its a function to clean the history and reward
    evolvable_reward.clear()
    evolvable_history.clear()


In [None]:
### Gamma Distribution

import plotly.figure_factory as ff

# Add histogram data
x1 = np.random.gamma( shape= 1, scale= 1.5, size= 200)
x2 = np.random.gamma( shape= 1, scale= 0.5, size= 200)

# Group data together
hist_data = [x1, x2]

group_labels = ['Scale > 1', 'Scale < 1']

# Create distplot with custom bin_size
fig = ff.create_distplot(hist_data, group_labels, bin_size=[.1, .25, .5, 1])
fig.update_layout(title = 'Shape of Gamma Distribution')
fig.show()

Match 

In [None]:
# define a  function for simple match between strategies

def Match(player1=Nice, player2=Nice, len = 100):

    p1 = player1()        # first player
    p2 = player2()        # second player

    # in case of having more than on match between the players we can define a loop
    # ( multiple matches between same players can use for having probabilistic results )
    for i in range(len):
      p1 = player1(p2.action)   # calling player 1 by passing player 2's action to player 1 which automatically compute the result and save it in relative rewards
      p2 = player2(p1.action)   # calling player 2 by passing player 1's action to player 2 which automatically compute the result and save it in relative rewards

    result = [ np.sum(list(p1.reward)[-len:]) , np.sum(list(p2.reward)[-len:]) ]      # computing the gains which is the sum of last "len" number of matches
    res_dict = {'Name':[p1.name,p2.name],'Gain':[result[0],result[1]]}        # make a dictionary of players and their results
    res_df = pd.DataFrame(res_dict)           # change the format to DataFrame for plotting

    # plotting the results by pie chart
    fig = px.pie(res_df, values='Gain', names='Name', title=str(f"Match between {p1.name} and {p2.name} for {len} times"),
                 hover_data=['Name'], labels={'Name':'Strategy'} )
    fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.show()

    # clear histories and rewards
    p1.clear()
    p2.clear()

In [None]:
Strategies = [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]  # set a list of players
L = 100               # set number of rounds

# set a match between all the players in the list for 100 rounds
for player1 in Strategies:
  for player2 in Strategies:
    # only display different strategies because same strategies will have obvious results(equal gain based on their actions)
    if player1 != player2 :
      Match(player1,player2,len=L)

MPIPD( multiple players iterative prisoner's dilemma )

In [None]:
# define the round robin scheme generator which takes a list of players and return the matches schedule (touple of players) 

def round_robin_gen(players, away_home= False):
    
    p = players.copy()   # creat a copy of players in order not to change it

    # In case of having even number of players
    if len(p) % 2 != 0:
        p.append(None)

    matches = []         # creat the list of matches(schedule)

    for it in range(len(p) - 1):
        matches.append( [ [ p[i], p[i + len(p)//2 ] ] for i in range( len(p)//2 ) ] )  # append the tuple of players( which the order is one from the first and the other from the middle until the end of the list)
        p.insert(1, p.pop())     # take the last player and insert it in second place in the list for the next round of matches

    # in case of having away_home option which will add the reverse of the schedule
    if away_home:
        return matches + [[a[::-1] for a in m] for m in matches]

    return matches

In [None]:
# showing the result of round robin generation function

for _ , l in enumerate(round_robin_gen(Strategies)):
  for a,b in l:
    if a != None and b != None:
      print('\n',(a().name,b().name))

for it in [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]:
    it().clear()


 ('Nice', 'Mainly Bad')

 ('Bad', 'Tit For Tat')

 ('Nice', 'Mainly Nice')

 ('Bad', 'Tit For Tat')

 ('Nice', 'Bad')

 ('Tit For Tat', 'Mainly Nice')

 ('Mainly Bad', 'Bad')

 ('Tit For Tat', 'Mainly Nice')

 ('Nice', 'Tit For Tat')

 ('Mainly Bad', 'Bad')


In [None]:
# define a Tournament function

def tournament( players , repetition = 100, shuffle = False):
  
  class_p = [None]*len(players)        # set a list for classes of players
  res_p = []                           # set a list for results of players
  matches = round_robin_gen(players)   # set the matches based of round robin

  # do the loop for number of repetition that was specified
  for rep in range(repetition):
    # in case of having true shuflle
    if shuffle :
      np.random.shuffle(players)            # randonmly shuffle the players and
      matches = round_robin_gen(players)    # set a new matches with new list of players

    # do a loop on every single match in the schedule
    for _ , l in enumerate(matches):
      for player1, player2 in l:
        if player1 != None and player2 != None:
          p1 = player1()            # call the class
          p2 = player2()            # call the class
          p1 = player1(p2.action)   # 
          p2 = player2(p1.action)   # 

    # defining the classes of players
    for i , strategy in enumerate(players):
      class_p[i] = strategy()

    res_p.append( [ (np.sum(list(p.reward)[-len(players):] ) , p.name, rep+1) for p in class_p ] )    # calculating the results of matches and append them to results for every round of repetition

    # clear histories and rewards for next repetition
    for it in class_p:
      it.clear()

  return res_p


In [None]:
Strategies = [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]      # set a list of players
data = tournament(players = Strategies, repetition= 100 )           # play a Tournament
data = sum(data,[])     # get rid of unwanted lists

In [None]:
df = pd.DataFrame(data, columns=['Gain','Strategy','Repetition'])      # transfer the results into DataFrame for plots
df

Unnamed: 0,Gain,Strategy,Repetition
0,8,Nice,1
1,13,Bad,1
2,11,Mainly Nice,1
3,9,Mainly Bad,1
4,10,Tit For Tat,1
...,...,...,...
495,8,Nice,100
496,13,Bad,100
497,11,Mainly Nice,100
498,8,Mainly Bad,100


In [None]:
df.groupby(by=["Strategy"]).sum()     # this will show Accumulated Gains of players in the Tournament

Unnamed: 0_level_0,Gain,Repetition
Strategy,Unnamed: 1_level_1,Unnamed: 2_level_1
Bad,1416,5050
Mainly Bad,705,5050
Mainly Nice,1095,5050
Nice,768,5050
Tit For Tat,1001,5050


In [None]:
fig = px.bar(df, x='Strategy', y='Gain', text_auto=True, title = 'Tournament (with repetition)')      # make a stacked bar plot that accumulate the results of repetitions 
fig.show()

In [None]:
fig = px.area( df, x='Repetition', y='Gain', color='Strategy', title = 'Tournament (with repetition)')
fig.show()

Iterative Tournament
(Moran process)

In [None]:
random.seed(123)    # set seed

# clear all histories and rewards
for it in [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]:
    it().clear()

In [None]:
'''
define a function for Birth and Death
the thing is that when this function calls with players and their respective probabilities to reproduce(which is based on their resul),
if birth happens, death is also happens but if replication does not happen, death also will not happen.
'''

def Birth_Death( players, prob ):

  b = prob.index( max( prob ) )         # set the index of most probable player to replicate

  # with the specific probability the most successful player may replicate
  if random.random() <= prob[b] :
    
    born = players[ b ]                                 # set the replicated player
    non_born = [ p for p in players if p != born ]      # list of non replicated players
    dead = random.choice( non_born )                    # randomly choose a player to die from non replicated players( since if we the replicated player we may not terminate the Moran Process)
    d = players.index( dead )                           # set index of dead player

    players.remove( dead )        # remove the dead player
    players.append( born )        # append the new player

  return players

In [None]:
"""
define a function for the Moran Process
in Moran Process players play against each other based on round robin scheme and
will set a probability to each of them based on their result in the Tournament 
and will replicate or dead according to their assigned probabilities.
"""

def Moran( players ):

  res_p = []      # reasult of players in each round of tournament
  count = 1       # round of tournament
  flag = 0        # flag to keep track of players

  while flag != (len( players )-1) :

    print('***************')
    print(f'{count} Round :\n')
    length = len(players)                 # set length of players
    class_p = [None]*length               # set an empty list for classes of players
    matches = round_robin_gen( players )  # set matches based on round robin
    print('Matches : \n')

    # play the tournament
    for _  , l in enumerate(matches):
      for player1 , player2 in l :
        if player1 != None and player2 != None :
          p1 = player1()
          p2 = player2()
          p1 = player1(p2.action)
          p2 = player2(p1.action)
          print(f'Player 1 : {p1.name}, Player 2 : {p2.name}\n')

    # assign different classes of players
    for i , strategy in enumerate( players ):
      class_p[i] = strategy()

    print(f'Class of Players in respective round : {class_p} \n')

    results = [ (np.sum(list(p.reward)[-length:] ) , p.name, count) for p in class_p ]      # keep track of resutls in each round of tournament
    res_p.append( results )
    print(f'The results in respective match : {results} \n')

    # Here I need to Implement a strategy to define some probabilities for each of players based on their scores in the tournament
    # the one who got the most point will gets the highest probability

    prob = [ np.sum( list( p.reward ) )/(count*length*100) for p in class_p ]               # set the probabilities of replication
    print(f'Probabilities of players for replication: {prob} \n')

    players = Birth_Death( players , prob )                     # call the birth and death function
    print(f'new population after replication : {players} \n')

    count += 1          # next round
    
    # I need to keep track of players to leave the "while" loop if all the players are the same
    flag = 0
    for i in range(len(players)-1):
      if players[i] == players[i+1]:
        flag += 1

  # clear all the histories and rewards(this happens after the moran process)
  for it in [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]:
    it().clear()

  return str(players[0]), res_p, count

In [None]:
Strategies = [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]        # set players
strategy, result , n_repetition= Moran(Strategies)                    # calling the moran process which will return the winner strategy, the whole results and the number of rounds
print(strategy,f' wins with {n_repetition} number of repetition')     

***************
1 Round :

Matches : 

Player 1 : Nice, Player 2 : Mainly Bad

Player 1 : Bad, Player 2 : Tit For Tat

Player 1 : Nice, Player 2 : Mainly Nice

Player 1 : Bad, Player 2 : Tit For Tat

Player 1 : Nice, Player 2 : Bad

Player 1 : Tit For Tat, Player 2 : Mainly Nice

Player 1 : Mainly Bad, Player 2 : Bad

Player 1 : Tit For Tat, Player 2 : Mainly Nice

Player 1 : Nice, Player 2 : Tit For Tat

Player 1 : Mainly Bad, Player 2 : Bad

Class of Players in respective round : [<__main__.Nice object at 0x7fef40125810>, <__main__.Bad object at 0x7fef40150150>, <__main__.Mainly_Nice object at 0x7fef40150650>, <__main__.Mainly_Bad object at 0x7fef401505d0>, <__main__.Tit_For_Tat object at 0x7fef40150510>] 

The results in respective match : [(8, 'Nice', 1), (15, 'Bad', 1), (10, 'Mainly Nice', 1), (7, 'Mainly Bad', 1), (10, 'Tit For Tat', 1)] 

Probabilities of players for replication: [0.032, 0.062, 0.03, 0.022, 0.04] 

new population after replication : [<class '__main__.Nice'>, <cl

In [None]:
result = sum(result,[])         # get rid of unwanted lists
result_df = pd.DataFrame(result, columns=['Gain','Strategy','Round'])       # change the format of results to DataFrame for ploting purposes
result_df

Unnamed: 0,Gain,Strategy,Round
0,8,Nice,1
1,15,Bad,1
2,10,Mainly Nice,1
3,7,Mainly Bad,1
4,10,Tit For Tat,1
...,...,...,...
110,9,Bad,23
111,9,Mainly Nice,23
112,11,Bad,23
113,13,Bad,23


In [None]:
result_df.groupby(by=["Strategy"]).sum()      # this will show the Gains of each class of players in moran process

Unnamed: 0_level_0,Gain,Round
Strategy,Unnamed: 1_level_1,Unnamed: 2_level_1
Bad,588,689
Mainly Bad,133,190
Mainly Nice,217,276
Nice,116,120
Tit For Tat,144,105


In [None]:
# stacked bar plot of the Moran Proces results

fig = px.bar(result_df, x='Strategy', y='Gain', color = 'Round',text_auto=True, title = 'Moran Process')  
fig.show()

In [None]:
fig = px.area(result_df, x='Round', y='Gain', color='Strategy', title = " Moran Process ")
fig.show()

*Mutation*(Transition Mode)

In [None]:
random.seed(1234)          # set seed

# clear all histories and rewards before the next round
for it in [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat, EvolvablePlayer]:
    it().clear()

In [None]:
'''
Define a Birth and Death function
which can have mutation on new born players
that the method of mutation can be specified as 'transition' or 'atomic'
in transition mutation mode( if happens with mutation rate probability ), the new born player will be chosen from all available(non evolvable) players
but in atomic mode, the new born strategy is an evolvable one which its respective action will choosen from a gamma distribution. 
'''

def Birth_Death( players, prob, mutation_mode, mutation_rate ):

  mutation_target = [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]       # non evolvable target players which is used in transition mode
  b = prob.index( max( prob ) )                                             # set index of probable replica

  if random.random() <= prob[b] :
    
    born = players[ b ]                                     # set born player 
    non_born = [ p for p in players if p != born ]          # set non born players

    if random.random() <= mutation_rate :
      if mutation_mode == 'transition' :

        born = random.choice( mutation_target )           # randomly choose from mutation target strategies
        print("*******Mutation happend********")
        print(f'Mutation mode is : {mutation_mode}\n And the replica is : {born}')

      elif mutation_mode == 'atomic' :

        born = EvolvablePlayer                        # set born player to evolvable player
        print("*******Mutation happend********")
        print(f'Mutation mode is : {mutation_mode}\n And the replica is : {born}')
    
    
    dead = random.choice( non_born )                  # set dead player from non born players
    d = players.index( dead )                         # set index of dead player

    players.remove( dead )                            # remove the dead player
    players.append( born )                            # add new born player

  return players

In [None]:
"""
Define the Moran  Process with mutation
which will takes mutation_mode and mutation rate as well.
Fitness rate is the performance of the players based on their results in the rounds.
It can defines the shape of gamma distribution is case of atomic mutation and will eventually have effect on the action of evolvable player
"""


def Moran ( players, mutation_mode = "transition", mutation_rate = 0.3 ):

  fitness_rate = 1              # set fitness rate
  res_p = []                    
  count = 1
  flag = 0

  # while the players are not the same do the loop
  while flag != (len( players )-1) :

    print('***************')
    print(f'{count} Round :\n')
    length = len(players)                   
    class_p = [None]*length                
    matches = round_robin_gen( players )
    print('Matches : \n')

    # after defining the matches pick every two players in the schedule and set match between them
    for _  , l in enumerate(matches):
      for player1 , player2 in l :
        if player1 != None and player2 != None :

          # if the player in the match is evolvable I needed to specify the shape on different rounds( actually update it ) 
          if player1 == EvolvablePlayer :
            p1 = player1( rate = mutation_rate, shape = fitness_rate )
            p2 = player2()
          elif player2 == EvolvablePlayer :
            p1 = player1()
            p2 = player2( rate = mutation_rate, shape = fitness_rate)
          else :
            p1 = player1()
            p2 = player2()
          
          p1 = player1(p2.action)
          p2 = player2(p1.action)
          print(f'Player 1 : {p1.name}, Player 2 : {p2.name}\n')

    # set the list of classes of players
    for i , strategy in enumerate( players ):
      class_p[i] = strategy()

    print(f'Class of Players in respective round : {class_p} \n')

    # save every match's result
    results = [ ( np.sum(list(p.reward)[-length:] ), p.name, count) for p in class_p ]
    res_p.append( results )
    print(f'The results in respective match : {results} \n')

    # Here I need to Implement a strategy to define some probabilities for each of players based on their scores in the tournament
    # the one who gets the most will gets the highest probability 
    # this probability is actually fitness_rate

    prob = [ np.sum( list( p.reward ) )/(count*length*100) for p in class_p ]
    print(f'Probabilities of players for replication: {prob} \n')


    # set fitness rate to maximum probability at first because the only player who may born and mutate is the most probable one
    # if mutation happens and the mode was Atomic, the fitness rate will be the performance of mutant afterwards
    if mutation_mode == 'atomic' and players.count(EvolvablePlayer) != 0 :
      fitness_rate = np.sum( list( EvolvablePlayer().reward ) )/(count*length*100)
      print(f'fitness_rate is {fitness_rate}')
    else :
      fitness_rate = max(prob)
      print(f'fitness_rate is {fitness_rate}')


    # call birth and death function
    players = Birth_Death( players , prob , mutation_mode, mutation_rate )        
    print(f'new population after replication : {players} \n')

    count += 1      # next round

    # check the similarity of players in the list
    flag = 0
    for i in range(len(players)-1):
      if players[i] == players[i+1]:
        flag += 1

  # clear all the histories and rewards after the moran process
  for it in [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat, EvolvablePlayer]:
    it().clear()

  return str(players[0]), res_p, count

In [None]:
Strategies = [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]                  # set players
strategy, result , n_repetition= Moran(Strategies, mutation_mode= 'transition')     # call the Moran Process with mutation
print(strategy,f' wins with {n_repetition} number of repetition')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

Matches : 

Player 1 : Nice, Player 2 : Mainly Bad

Player 1 : Bad, Player 2 : Tit For Tat

Player 1 : Nice, Player 2 : Mainly Nice

Player 1 : Bad, Player 2 : Tit For Tat

Player 1 : Nice, Player 2 : Bad

Player 1 : Tit For Tat, Player 2 : Mainly Nice

Player 1 : Mainly Bad, Player 2 : Bad

Player 1 : Tit For Tat, Player 2 : Mainly Nice

Player 1 : Nice, Player 2 : Tit For Tat

Player 1 : Mainly Bad, Player 2 : Bad

Class of Players in respective round : [<__main__.Nice object at 0x7fef3ffe36d0>, <__main__.Bad object at 0x7fef3ff99150>, <__main__.Mainly_Nice object at 0x7fef3ff99190>, <__main__.Mainly_Bad object at 0x7fef3ff991d0>, <__main__.Tit_For_Tat object at 0x7fef3ff99210>] 

The results in respective match : [(6, 'Nice', 4), (13, 'Bad', 4), (13, 'Mainly Nice', 4), (8, 'Mainly Bad', 4), (9, 'Tit For Tat', 4)] 

Probabilities of players for replication: [0.028, 0.061, 0.033, 0.0225, 0.0375] 

fitness_rate is 0.061


In [None]:
result = sum(result,[])         # get rid of unwanted lists
result_df = pd.DataFrame(result, columns=['Gain','Strategy','Round'])       # change the format of results to DataFrame
result_df

Unnamed: 0,Gain,Strategy,Round
0,6,Nice,1
1,15,Bad,1
2,12,Mainly Nice,1
3,6,Mainly Bad,1
4,9,Tit For Tat,1
...,...,...,...
745,9,Bad,150
746,9,Tit For Tat,150
747,11,Bad,150
748,13,Bad,150


In [None]:
result_df.groupby(by=["Strategy"]).sum()          # Gains of different strategies

Unnamed: 0_level_0,Gain,Round
Strategy,Unnamed: 1_level_1,Unnamed: 2_level_1
Bad,4945,39319
Mainly Bad,617,2926
Mainly Nice,428,780
Nice,374,1225
Tit For Tat,1569,12375


In [None]:
# plot the results with stacked bar plot
fig = px.bar(result_df, x='Strategy', y='Gain', color = 'Round',text_auto=True, title = 'Moran Process(with transition mode)')
fig.show()

In [None]:
fig = px.area( result_df, x='Round', y='Gain', color='Strategy', title = " Moran Process(with transition mode) ")
fig.show()

Mutation(Atomin mode)

In [None]:
random.seed(1234)          # set seed

# clear all histories and rewards before the next round
for it in [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat, EvolvablePlayer]:
    it().clear()

In [None]:
'''
Define a Birth and Death function
which can have mutation on new born players
that the method of mutation can be specified as 'transition' or 'atomic'
in transition mutation mode( if happens with mutation rate probability ), the new born player will be chosen from all available(non evolvable) players
but in atomic mode, the new born strategy is an evolvable one which its respective action will choosen from a gamma distribution. 
'''

def Birth_Death( players, prob, mutation_mode, mutation_rate ):

  mutation_target = [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]       # non evolvable target players which is used in transition mode
  b = prob.index( max( prob ) )                                             # set index of probable replica

  if random.random() <= prob[b] :
    
    born = players[ b ]                                     # set born player 
    non_born = [ p for p in players if p != born ]          # set non born players

    if random.random() <= mutation_rate :
      if mutation_mode == 'transition' :

        born = random.choice( mutation_target )           # randomly choose from mutation target strategies
        print("*******Mutation happend********")
        print(f'Mutation mode is : {mutation_mode}\n And the replica is : {born}')

      elif mutation_mode == 'atomic' :

        born = EvolvablePlayer                        # set born player to evolvable player
        print("*******Mutation happend********")
        print(f'Mutation mode is : {mutation_mode}\n And the replica is : {born}')
    
    
    dead = random.choice( non_born )                  # set dead player from non born players
    d = players.index( dead )                         # set index of dead player

    players.remove( dead )                            # remove the dead player
    players.append( born )                            # add new born player

  return players

In [None]:
"""
Define the Moran  Process with mutation
which will takes mutation_mode and mutation rate as well.
Fitness rate is the performance of the players based on their results in the rounds.
It can defines the shape of gamma distribution is case of atomic mutation and will eventually have effect on the action of evolvable player
"""


def Moran ( players, mutation_mode = "transition", mutation_rate = 0.3 ):

  fitness_rate = 1              # set fitness rate
  res_p = []                    
  count = 1
  flag = 0

  # while the players are not the same do the loop
  while flag != (len( players )-1) :

    print('***************')
    print(f'{count} Round :\n')
    length = len(players)                   
    class_p = [None]*length                
    matches = round_robin_gen( players )
    print('Matches : \n')

    # after defining the matches pick every two players in the schedule and set match between them
    for _  , l in enumerate(matches):
      for player1 , player2 in l :
        if player1 != None and player2 != None :

          # if the player in the match is evolvable I needed to specify the shape on different rounds( actually update it ) 
          if player1 == EvolvablePlayer :
            p1 = player1( rate = mutation_rate, shape = fitness_rate )
            p2 = player2()
          elif player2 == EvolvablePlayer :
            p1 = player1()
            p2 = player2( rate = mutation_rate, shape = fitness_rate)
          else :
            p1 = player1()
            p2 = player2()
          
          p1 = player1(p2.action)
          p2 = player2(p1.action)
          print(f'Player 1 : {p1.name}, Player 2 : {p2.name}\n')

    # set the list of classes of players
    for i , strategy in enumerate( players ):
      class_p[i] = strategy()

    print(f'Class of Players in respective round : {class_p} \n')

    # save every match's result
    results = [ ( np.sum(list(p.reward)[-length:] ), p.name, count) for p in class_p ]
    res_p.append( results )
    print(f'The results in respective match : {results} \n')

    # Here I need to Implement a strategy to define some probabilities for each of players based on their scores in the tournament
    # the one who gets the most will gets the highest probability 
    # this probability is actually fitness_rate

    prob = [ np.sum( list( p.reward ) )/(count*length*100) for p in class_p ]
    print(f'Probabilities of players for replication: {prob} \n')


    # set fitness rate to maximum probability at first because the only player who may born and mutate is the most probable one
    # if mutation happens and the mode was Atomic, the fitness rate will be the performance of mutant afterwards
    if mutation_mode == 'atomic' and players.count(EvolvablePlayer) != 0 :
      fitness_rate = np.sum( list( EvolvablePlayer().reward ) )/(count*length*100)
      print(f'fitness_rate is {fitness_rate}')
    else :
      fitness_rate = max(prob)
      print(f'fitness_rate is {fitness_rate}')


    # call birth and death function
    players = Birth_Death( players , prob , mutation_mode, mutation_rate )        
    print(f'new population after replication : {players} \n')

    count += 1      # next round

    # check the similarity of players in the list
    flag = 0
    for i in range(len(players)-1):
      if players[i] == players[i+1]:
        flag += 1

  # clear all the histories and rewards after the moran process
  for it in [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat, EvolvablePlayer]:
    it().clear()

  return str(players[0]), res_p, count

In [None]:
Strategies = [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat]                  # set players
strategy, result , n_repetition= Moran(Strategies, mutation_mode= 'atomic')     # call the Moran Process with mutation
print(strategy,f' wins with {n_repetition} number of repetition')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

Matches : 

Player 1 : Nice, Player 2 : Mainly Bad

Player 1 : Bad, Player 2 : Tit For Tat

Player 1 : Nice, Player 2 : Mainly Nice

Player 1 : Bad, Player 2 : Tit For Tat

Player 1 : Nice, Player 2 : Bad

Player 1 : Tit For Tat, Player 2 : Mainly Nice

Player 1 : Mainly Bad, Player 2 : Bad

Player 1 : Tit For Tat, Player 2 : Mainly Nice

Player 1 : Nice, Player 2 : Tit For Tat

Player 1 : Mainly Bad, Player 2 : Bad

Class of Players in respective round : [<__main__.Nice object at 0x7fef3ff1f150>, <__main__.Bad object at 0x7fef4606c110>, <__main__.Mainly_Nice object at 0x7fef4606cdd0>, <__main__.Mainly_Bad object at 0x7fef4606c590>, <__main__.Tit_For_Tat object at 0x7fef4606ced0>] 

The results in respective match : [(6, 'Nice', 4), (13, 'Bad', 4), (13, 'Mainly Nice', 4), (8, 'Mainly Bad', 4), (9, 'Tit For Tat', 4)] 

Probabilities of players for replication: [0.028, 0.061, 0.033, 0.0225, 0.0375] 

fitness_rate is 0.061


In [None]:
result = sum(result,[])         # get rid of unwanted lists
result_df = pd.DataFrame(result, columns=['Gain','Strategy','Round'])       # change the format of results to DataFrame
result_df

Unnamed: 0,Gain,Strategy,Round
0,6,Nice,1
1,15,Bad,1
2,12,Mainly Nice,1
3,6,Mainly Bad,1
4,9,Tit For Tat,1
...,...,...,...
745,9,Bad,150
746,9,Tit For Tat,150
747,11,Bad,150
748,13,Bad,150


In [None]:
result_df.groupby(by=["Strategy"]).sum()          # Gains of different strategies

Unnamed: 0_level_0,Gain,Round
Strategy,Unnamed: 1_level_1,Unnamed: 2_level_1
Bad,4945,39319
Evolvable Player,244,1050
Mainly Bad,617,2926
Mainly Nice,428,780
Nice,356,1225
Tit For Tat,1375,11325


In [None]:
# plot the results with stacked bar plot
fig = px.bar(result_df, x='Strategy', y='Gain', color = 'Round',text_auto=True, title = 'Moran Process(with atomic mode)')
fig.show()

In [None]:
fig = px.area( result_df, x='Round', y='Gain', color='Strategy', title = " Moran Process(with atomic mode) ")
fig.show()

In [None]:
# clear all histories and rewards before the next round
for it in [Nice, Bad, Mainly_Nice, Mainly_Bad, Tit_For_Tat, EvolvablePlayer]:
    it().clear()

Reinforcment Learning(with DQN)