In [15]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp
import quandl
import functools
import seaborn as sns
import plotnine as p9
import math
import datetime
import statsmodels.formula.api as sm
import statsmodels
import statsmodels.api as smapi
from statsmodels.regression.rolling import RollingOLS
import hmmlearn
from hmmlearn.hmm import GaussianHMM
import warnings
import os
import matplotlib.dates as mdates
import random

# A Breif Look into of Goofspiel Strategy

Goofspiel, aka the *game of pure strategy* is a zero sum card game between two players with symmetric information. The game was invented by mathematician Meril Flood in the 1930s. This game is an example of repeated simultaneous decision making.

The gameplay is rather simple. A standard deck of 52 cards is divided into it's four suits. One suit is given to each player, one is discarded, and the last is shuffled randomly and placed face down between the two players, this suit serves as the draw pile. At the start of each turn, a "value" card $(A = 1,2 = 2, \dots Q = 12, K = 13)$ is flipped face up. Then each player simultainously "bids" on the value card by placing a card from their hand face down, then both players reveal their cards. Whichever player plays the higher card claims the value card, and both bid cards are discarded. After the conclusion of the 13th round, the value cards claimed by each player are summed up and whichever player with the highest score wins. 

Many strategies leverage random, deterministic, or learning strategies.

### Random Strategy
Random play is exactly what it sounds like. A player running a random strategy will play their bid cards randomly, with no regard for the value card at play. 

### Deterministic Strategy     
A deterministic strategy uses a static set of rules for play. A player running a basic deterministic strategy will always bid the same card in response to a certain value card. One common deterministic strategy is the matching strategy, in which a player will simply match the value card each time they bid. For example if the current upturned value card is a 7, a player running a matching strategy will bid a 7. Other common deterministic strategies are varients on the matching strategy. One example is the value card + n strategy, in which a player will play a card n higher than value card. If the value card + n is greater than 13, the player running value card + n will then throw off their lowest card. 

 Deterministic strategies are very easy to play against. If you know your opponent is running the matching strategy, you can simply play the value card + 1 strategy to win every card except the king. Giving a final score of 78 to 13. Two players running deterministic strategies are never in Nash Equilibrium. If both players are aware the other is running a deterministic strategy, each player will continue up bidding their strategy until they eventually realize they can improve their results by ducking certain tricks. However if both players realize this, the game eventually devolves into random play. Each player trying to get the edge on the other by playing unpredictably.   

The matching strategy has been proven (Ross, Sheldon M. (September 1971). *Goofspiel -- The Game of Pure Strategy*) to be the optimal strategy into random play. Other upcard + n strategies perform better into random play the closer n is to 0. 
        
    
### Learning Strategies 
Learning strategies observe their opponent's play, and each play is determined by the opponents previous play. Naturally learning strategies perform quite admirably into deterministic strategies, they are consistantly able to upbid their opponent by exactly one card, and thus have very high winrates into deterministic strategies. They do not perform as well into random strategies — however — as there is nothing to learn from the opponent's play. In fact, learning strategies into random strategies are essentially just random strategies with slightly worse than random card selection (essentially random + 1). 


## Emperical Analysis 
we compare the performace of these strategies into one another by observing their winrates. We can compare two strategies relative performances by either looking at their average scores, or their winrates. Since I am more interested in winning and losing, I will look at winrates

In [211]:
def run_strategy(strat_a, strat_b):
    cards = []
    middle_cards = []
    a_cards = []
    b_cards = []
    for i in range(0,13):
        cards.append(i+1)
    a_score = 0
    b_score = 0
    middle_score = 0
    for i in range(0,13):
        middle_card = draw_card(cards)
        middle_score = middle_card
        a = strat_a(middle_card,middle_cards,b_cards,a_cards)
        b = strat_b(middle_card,middle_cards,a_cards,b_cards)
        if (a > b):
            a_score = a_score + middle_score
            middle_score = 0
        elif (b > a):
            b_score = b_score + middle_card
            middle_score = 0
        a_cards.append(a)
        b_cards.append(b)
        middle_cards.append(middle_card)
        cards = list_subtract(cards,[middle_card])
    return (a_score,b_score)
            
def get_winrate(strat_a,strat_b):
    a_wins = 0
    b_wins = 0
    ties = 0
    n = 1000
    a_avg = 0
    b_avg = 0
    for i in range(0,1000):
        (a,b) = run_strategy(strat_a,strat_b)
        a_avg = a_avg + a
        b_avg = b_avg + b
        if (b > a):
            b_wins = b_wins + 1
        elif (a > b):
            a_wins = a_wins + 1
        else:
            a_wins = a_wins + 0.5
    return a_wins/n
    
def base_winrate_table(names,list_of_strats):
    basic_strats = [det_0,det_1,learning,random_strat]
    basic_names = ['Matching','Upcard + 1', 'Basic Learning', 'Random']
    df = pd.DataFrame(index = range(0,len(names)))
    for i in basic_names:
        df[i] = np.zeros(len(names))
    for i in range(0,len(names)):
        for j in range(0,4):
            df[basic_names[j]][i] = get_winrate(list_of_strats[i],basic_strats[j])
            
    df.index = names

    return df
        
        
def winrate_table(strats,strat_functions):
    df = pd.DataFrame(index = range(0,len(strats)))
    for i in strats:
        df[i] = np.zeros(len(strats))

    for i in range(0,len(strats)):
        for j in range(0,len(strat_functions)):
            df[strats[i]][j] = get_winrate(strat_functions[j],strat_functions[i])
    df.index = strats
    return df
    
def draw_card(cards):
    return cards[random.randint(0,len(cards) -1)]
    
def list_subtract(x,y):
    return [item for item in x if item not in y]  

def minus(x,y):
    return list(pd.Series(x,dtype = 'int') - pd.Series(y,dtype = 'int'))

In [247]:
#Basic Strategies 
def deterministic(current_card,middle_cards,enemy_cards,player_cards, n):
    cards = []
    for i in range(0,13):
        cards.append(i+1)
    hand = list_subtract(cards,player_cards)
    deck = list_subtract(cards,middle_cards)
    if(current_card + n <= 13) and (current_card + n in hand):
        return current_card + n
    elif (current_card + n > 13):
        return hand[0]
    else:
        return hand[deck.index(current_card)]
            

def det_1(current_card,middle_cards,enemy_cards,player_cards):
    res = deterministic(current_card,middle_cards,enemy_cards,player_cards,1)
    #print(current_card,res)
    return res

def det_0(current_card,middle_cards,enemy_cards,player_cards):
    return deterministic(current_card,middle_cards,enemy_cards,player_cards,0)
    
def learning(current_card,middle_cards,enemy_cards,player_cards):
    if(len(middle_cards) > 0):
        n_est = enemy_cards[-1] - middle_cards[-1] 
        #print(n_est,current_card)
        if(n_est < 0):
            n_est = -1
        return deterministic(current_card,middle_cards,enemy_cards,player_cards,n_est + 1)
    else:
        cards = []
        for i in range(0,13):
            cards.append(i+1)
        hand = list_subtract(cards,player_cards)
        return hand[0]
    
def random_strat(current_card,middle_cards,enemy_cards,player_cards):
    cards = []
    for i in range(0,13):
        cards.append(i+1)
    avail_cards = list_subtract(cards,player_cards)
    return draw_card(avail_cards)

In [248]:
strats = ['Matching Strategy','Upcard + 1','Learning','Random']
strat_functions = [det_0,det_1,learning,random_strat]
df = winrate_table(strats,strat_functions)
df

Unnamed: 0,Matching Strategy,Upcard + 1,Learning,Random
Matching Strategy,0.5,0.0,0.0,0.965
Upcard + 1,1.0,0.5,0.0,0.8945
Learning,1.0,1.0,0.5,0.664
Random,0.034,0.129,0.337,0.5005


so we see random loses to matching which loses to upcard + 1 which both lose to learning. But we see the learning strategy perform poorly against random. This indicates that there is no Nash equilibream using basic strategies, since no matter what you are always inclined to switch to another strategy. So we now try to develop a "Good" strategy by taking the good aspects of basic strategies and combining them into a hybrid strategy. 

We design a more advanced "hybrid" strategy that is a combination of deterministic and random. This strategy plays like determinisitc, but  $n \in \mathbb{Z}\cap [-1,2]$ and n is picked randomly. If n = -1, the player will instead play the lowest card in their hand. If the card the player wants to play is not in their hand, they will also play the lowest card in their hand. 

In [259]:
#advanced strategies

def determ_random(current_card,middle_cards,enemy_cards,player_cards):
    n = random.randint(-1,3)
    if n == -1:
        cards = []
        for i in range(0,13):
            cards.append(i+1)
        avail_cards = list_subtract(cards,player_cards)
        return avail_cards[0]
    else:
        return deterministic(current_card,middle_cards,enemy_cards,player_cards,n)
 
#looks at enemy performance over multiple turns instead of most recent
def robust_learning(current_card,middle_cards,enemy_cards,player_cards):
    residuals = minus(enemy_cards,middle_cards)
    if len(residuals) > 0:
        total = 0
        n = 0
        for residual in residuals:
            if(abs(residual) < 5):
                total = total + residual
                n = n + 1
        if(n > 0):
            mean = total/n
            return deterministic(current_card,middle_cards,enemy_cards,player_cards, mean + 1)
        
    return random_strat(current_card,middle_cards,enemy_cards,player_cards)

def mean_learning(current_card,middle_cards,enemy_cards,player_cards):
    if len(middle_cards) > 0:
        residuals = minus(enemy_cards,middle_cards)
        mean = pd.Series(residuals).mean()
        return deterministic(current_card,middle_cards,enemy_cards,player_cards, mean + 1) 
    else:
        return det_0(current_card,middle_cards,enemy_cards,player_cards)

        

def mixed_strategy(current_card,middle_cards,enemy_cards,player_cards):
    strat = random.randint(0,1)
    if strat == 1:
        return mean_learning(current_card,middle_cards,enemy_cards,player_cards)
    else:
        cards = []
        for i in range(0,13):
            cards.append(i+1)
        hand = list_subtract(cards,player_cards)
        deck = list_subtract(cards,middle_cards)
        return hand[deck.index(current_card)]

#this assumes opponent is either deterministic or random. 
def pearson_strategy(current_card,middle_cards,enemy_cards,player_cards):
    if len(middle_cards) > 2:
        correlation = pd.Series(middle_cards).corr(pd.Series(enemy_cards))
        if (correlation < 0.2):
            return det_0(current_card,middle_cards,enemy_cards,player_cards) #assumes random and plays matching
        else:
            return mean_learning(current_card,middle_cards,enemy_cards,player_cards)
    else: 
        return learning(current_card,middle_cards,enemy_cards,player_cards)
        
    
def greedy(current_card,middle_cards,enemy_cards,player_cards):
    cards = []
    for i in range(0,13):
        cards.append(i+1)
    hand = list_subtract(cards,player_cards)
    deck = list_subtract(cards,middle_cards)
    if(current_card > 8):
        return hand[-1]
    else:
        return hand[0]

In [261]:
strats = [ "Deterministic Random", "Robust Learning", "Pearson","Greedy"]
strat_functions = [determ_random, robust_learning,pearson_strategy,greedy]

df = base_winrate_table(strats,strat_functions)
df

Unnamed: 0,Matching,Upcard + 1,Basic Learning,Random
Deterministic Random,0.737,0.547,0.4055,0.6945
Robust Learning,0.9975,0.9945,0.33,0.739
Pearson,1.0,0.882,0.2975,0.784
Greedy,0.117,0.0825,0.332,0.8985


As we can see Deterministic random has a positive winrate into all of the previously mentioned strategies. Suprisingly 

In [240]:
run_strategy(learning,det_1)

1 3
1 6
1 13
-12 7
1 9
1 2
1 10
1 5
1 4
1 1
1 8
1 12


(54, 11)

In [174]:
get_winrate(determ_random,det_1)

0.5385

In [139]:
cards = []
player_cards 
for i in range(0,13):
    cards.append(i+1)
avail_cards = list_subtract(cards,player_cards)


In [140]:
avail_cards

[1, 2, 4, 6, 8, 9, 10, 11, 12, 13]

In [181]:
x = [1,3,3]
y = [1,2,3]

[0, 1, 0]