# Genetic algorithm Farkle player

The goal of this player is to maximize their individual score in an infinite game. 

For any number of dice remaining there should be a cutoff score at which you would no longer be willing to roll and risk your current score. Rather than directly figure out the expected score from rolling any number of dice I'll just write a genetic algo to backout the threshold score at which you should no longer roll. 

In [1]:
import sys
sys.path.insert(0, '../../src')

import json
import random
from time import perf_counter
from collections import defaultdict

from farkle.game.players import Player
from farkle.logic import gameobjects as go

In [4]:
def unhash_frequencies(hash_to_dh, hash_to_freq):
    _possible_score_freq = {}
    for d in hash_to_freq:
        d_int = int(d)
        _possible_score_freq[d_int] = defaultdict(int)
        for h, freq in hash_to_freq[d].items():
            if hash_to_dh[h] is not None:
                dh = go.DiceHand.json_decode(hash_to_dh[h])
            else:
                dh = None
            _possible_score_freq[d_int][dh] = int(freq)
    return _possible_score_freq

def load_freq(path: str):
    with open(path, 'r') as f:
        j = json.loads(f.read())

    return unhash_frequencies(j['hash_to_dh'], j['hash_to_freq'])

In [5]:
freq_path = '../../../models/possible_score_frequencies.json'
possible_score_frequencies = load_freq(freq_path)

In [6]:
def p_farkle(num_dice: int = None):
    if num_dice == 0: num_dice = 6
    if num_dice is not None:
        return possible_score_frequencies[num_dice][None] / 6**num_dice
    else:
        return {num_dice: possible_score_frequencies[num_dice][None] / 6**num_dice 
                for num_dice in possible_score_frequencies}

In [63]:
def weighted_score(dice_left: int, total_score: int):
    assert dice_left in range(0, 7) and total_score >= 0
    return (1-p_farkle(dice_left)) * (total_score+50)

In [38]:
class GeneticThresholdScorePlayer(Player):
    def __init__(self, name: str, thresholds: dict):
        """
        thresholds : dict
            Keys are num dice remaining and values are the thresholds above which 
            the player will no longer roll again
        """
        super().__init__(name=name)
        self.thresholds = thresholds
                
    def play_dicehand(self, dice_hand: go.DiceHand, game_state: go.GameState) -> go.RollDecision:
        """
        In GameState player ignores everything but dice_hand
        
        score decision logic:
            1. For each possible score determine whether this player will
            choose to roll again using the player's total un-weighted score
            and the remaining dice. 
            2. For those where we roll again take the weighted score. For 
            those where we don't roll again take the unweighted score. 
            3. Choose the scoring option with the largest score from step 2. 
            4. Use the re-roll decision from step 1. 
            
        I don't know that this is the best behavior, I actually think it is not
        """
        ps_list = dice_hand.possible_scores()
        compare_score = []
        roll_again_list = []
        for ps in ps_list:
            dice_left = len(dice_hand.free_dice) - len(ps.free_dice)
            total_score = self.score + dice_hand.score + ps.score
            wght_score = weighted_score(dice_left, total_score)
            
            roll_again = dice_left == 0
            if not roll_again: roll_again = total_score < self.thresholds[dice_left]
            roll_again_list.append(roll_again)
            
            if roll_again: compare_score.append(wght_score)
            else: compare_score.append(total_score)
        
        choose_index = compare_score.index(max(compare_score))
        choose_ps = ps_list[choose_index]
        choose_roll_again = roll_again_list[choose_index]
        
        post_dice_hand = dice_hand.copy()
        post_dice_hand.lock_from_dicehand(choose_ps)
        
        return go.RollDecision(dice_hand, post_dice_hand, choose_roll_again)

In [48]:
# example of behavior I wouldn't do myself
# I would take the 1 and leave the 2,2,2 since seems
# like a waist of 3 dice that would likely get a better score
thresholds = {i+1: 500 + 200 * i for i in range(5)}
name = 'gtsp1'
gtsp = GeneticThresholdScorePlayer(name=name, thresholds=thresholds)
# gtsp.thresholds

dh = go.DiceHand(1,2,2,2,4,3)
# dh

for ps in dh.possible_scores():
    ps: go.DiceHand
    added_score = ps.score
    total_score = dh.score + added_score
    dice_left = len(dh.free_dice) - len(ps.free_dice)
    print(ps)
    print(f'Total score: {total_score}')
    print(f'Dice remaining: {dice_left}')
    print(f'Prob of farkle: {p_farkle(dice_left)}')
    print(f'Weighted score: {(1-p_farkle(dice_left)) * (total_score+50)}')
    print('\n')

game_state = go.GameState(scores={name: 0}, 
                          current_player_name=name, 
                          dice_hand=dh, 
                          goal_score=None)
# game_state

print(gtsp.play_dicehand(dh, game_state))

DiceHand(free=[1], locked=[], score=100)
Total score: 100
Dice remaining: 5
Prob of farkle: 0.07716049382716049
Weighted score: 138.42592592592592


DiceHand(free=[1, 2, 2, 2], locked=[], score=300)
Total score: 300
Dice remaining: 2
Prob of farkle: 0.4444444444444444
Weighted score: 194.44444444444446


DiceHand(free=[2, 2, 2], locked=[], score=200)
Total score: 200
Dice remaining: 3
Prob of farkle: 0.2777777777777778
Weighted score: 180.55555555555554


RollDecision(dicehand_pre=DiceHand(free=[1, 2, 2, 2, 3, 4], locked=[], score=0), dicehand_post=DiceHand(free=[3, 4], locked=[1, 2, 2, 2], score=300), will_roll_again=True)


In [49]:
thresholds = {i+1: 500 + 200 * i for i in range(5)}
name = 'gtsp1'
gtsp = GeneticThresholdScorePlayer(name=name, thresholds=thresholds)
gtsp.score = 300  # can change above behaior if change player score
print(gtsp.thresholds, '\n')

dh = go.DiceHand(1,2,2,2,4,3)
# dh

for ps in dh.possible_scores():
    ps: go.DiceHand
    added_score = ps.score
    total_score = dh.score + added_score + gtsp.score
    dice_left = len(dh.free_dice) - len(ps.free_dice)
    print(ps)
    print(f'Total score: {total_score}')
    print(f'Dice remaining: {dice_left}')
    print(f'Prob of farkle: {p_farkle(dice_left)}')
    print(f'Weighted score: {(1-p_farkle(dice_left)) * (total_score+50)}')
    print('\n')

game_state = go.GameState(scores={name: 0}, 
                          current_player_name=name, 
                          dice_hand=dh, 
                          goal_score=None)
# game_state

print(gtsp.play_dicehand(dh, game_state))

{1: 500, 2: 700, 3: 900, 4: 1100, 5: 1300} 

DiceHand(free=[1], locked=[], score=100)
Total score: 400
Dice remaining: 5
Prob of farkle: 0.07716049382716049
Weighted score: 415.27777777777777


DiceHand(free=[1, 2, 2, 2], locked=[], score=300)
Total score: 600
Dice remaining: 2
Prob of farkle: 0.4444444444444444
Weighted score: 361.11111111111114


DiceHand(free=[2, 2, 2], locked=[], score=200)
Total score: 500
Dice remaining: 3
Prob of farkle: 0.2777777777777778
Weighted score: 397.22222222222223


RollDecision(dicehand_pre=DiceHand(free=[1, 2, 2, 2, 3, 4], locked=[], score=0), dicehand_post=DiceHand(free=[2, 2, 2, 3, 4], locked=[1], score=100), will_roll_again=True)


In [50]:
random.randrange(100, 200)

155

In [64]:
num_epochs = 10
num_player_turns = 10
num_players = 10

thresholds = {i+1: 500 + 200 * i for i in range(5)}
player_threshold_points_range_start = 70
# player_threshold_points_range_end = 

# players

for epoch_idx in range(num_epochs):
    # init players for this epoch
    player_threshold_points_range = 70
    player_thresholds = [{d: random.randrange(t-player_threshold_points_range, 
                                              t+player_threshold_points_range)
                          for d, t in thresholds.items()}
                         for p in range(num_players)]
    players = [GeneticThresholdScorePlayer(name=f'gtsp{p}', thresholds=player_thresholds[p])
               for p in range(num_players)]
    
    for player in players:
        # init this players dicehand and this gamestate
        dh = go.DiceHand()
        game_state_pre = go.GameState(scores={player.name: player.score}, 
                                      current_player_name=player.name, 
                                      dice_hand=dh, 
                                      goal_score=None)
        for turn_idx in range(num_player_turns):
            dh.reset_dice()
            farkled = False
            will_roll_again = True

            while will_roll_again and not farkled:
                # if player has hot dice and can re-roll all
                if dh.all_locked:
                    dh.roll_all_dice()
                else:
                    dh.roll()
                farkled = dh.farkled

                # gen current state before player makes roll decision
                game_state = go.GameState(game_state_pre.scores,
                                          player.name,
                                          dh,
                                          game_state_pre.goal_score)

                if not farkled:
                    roll_decision = player.play_dicehand(dh, game_state)
                    dh = roll_decision.dicehand_post
                    will_roll_again = roll_decision.will_roll_again

            points_earned = dh.score if not farkled else 0
            player.score += points_earned
            

In [66]:
players

[gtsp0: 1750 points,
 gtsp1: 3600 points,
 gtsp2: 2950 points,
 gtsp3: 5200 points,
 gtsp4: 3500 points,
 gtsp5: 2550 points,
 gtsp6: 2950 points,
 gtsp7: 4350 points,
 gtsp8: 2850 points,
 gtsp9: 2650 points]

In [68]:
players[3].thresholds

{1: 444, 2: 702, 3: 908, 4: 1147, 5: 1240}