In [1]:
import spacy
import random

In [2]:
import numpy as np
import pickle
import pandas as pd

In [3]:
import itertools

In [4]:
import time

In [5]:
nlp = spacy.load("fr_core_news_lg")

In [6]:
from nltk import PorterStemmer
st = PorterStemmer()

LOAD

In [7]:
with open("word_database.pkl", "rb") as f:
    BDD = pickle.load(f)

In [8]:
with open("wordlist.pkl", 'rb') as f:
    lst = pickle.load(f)

In [9]:
len(lst)

887

In [10]:
len(BDD.keys())

37124

In [11]:
def generate_board():
    
    rs = np.array(random.sample(lst, 25))
    print("BOARD\n")
    print(rs.reshape(5,5))
    
    np.random.shuffle(rs)
  
    B = rs[:8]
    R = rs[8:17]
    N = rs[17:24]
    A = rs[24]
    
    return B, R, N, A

In [12]:
B, R, N, A = generate_board()
print(B)

BOARD

[['feu' 'dent' 'électricité' 'guitare' 'ordure']
 ['politique' 'franc' 'race' 'titre' 'tante']
 ['phrase' 'silhouette' 'cuir' 'guide' 'ninja']
 ['fuir' 'sardine' 'moderne' 'crac' 'solution']
 ['uniforme' 'hôtel' 'école' 'pince' 'poupée']]
['guitare' 'hôtel' 'titre' 'ninja' 'race' 'moderne' 'électricité'
 'silhouette']


In [13]:
def is_stopwords(w, lst_words):
    if w in lst_words:
        return False
    stem_w = st.stem(w)
    for word in lst_words:
        if stem_w in word:
            return False
        if word in w:
            return False
    if len(w) < 3:
        return False
    return True

In [14]:
is_stopwords('entré', B)

True

In [15]:
#Get all our words arrays
all_words = np.array(list(BDD.values()))
print(all_words.shape)

(37124, 300)


In [26]:
def get_clue(pos_words, neg_words, neu_words, assassin_word, danger_coeff=1.8, agg=0.05, topn=50000, given_indices=[]):
    
    #vectorize words
    pos_vecs = [nlp.vocab.get_vector(str(w)) for w in pos_words]   # Shape (8, 300)
    neg_vecs = [nlp.vocab.get_vector(str(w)) for w in neg_words]
    neu_vecs = [nlp.vocab.get_vector(str(w)) for w in neu_words]
    ass_vec = nlp.vocab.get_vector(str(assassin_word))             # Shape (300)

    #get n_best candidates with the highest min scalar product
    #get pos_words scores
    pw = (all_words @ np.array(pos_vecs).T)                # Shape (37124, 8)
    #get neg_words scores (max)
    ngw = (all_words @ np.array(neg_vecs).T).max(axis=1)   # Shape (37124,)
    if len(neu_words) > 0:
        nw = (all_words @ np.array(neu_vecs).T).max(axis=1)    # Shape (37124,)
    aw = all_words @ np.array(ass_vec).T                   # Shape (37124,)


    #get top candidates:
    df = pd.DataFrame(pw, index=BDD.keys())              # 37124 rows 8 columns
    neg_df = pd.DataFrame(ngw, index=BDD.keys())         # 37124 rows 1 column
    if len(neu_words) > 0:
        neu_df = pd.DataFrame(nw, index=BDD.keys())
    ass_df = pd.DataFrame(aw, index=BDD.keys())

    for w in np.concatenate([pos_words, given_indices]): # Remove pos words and already given clues
        if w in BDD.keys():
            df.drop(w, inplace=True)
            neg_df.drop(w, inplace=True)
            if len(neu_words) > 0:
                neu_df.drop(w, inplace=True)
            ass_df.drop(w, inplace=True)

    #filter
    df["top"] = df.apply(lambda x: np.sort(x)[-1:], axis=1)
    threshold = np.sort(df.top)[::-1][:topn][-1][0]
    max_len = 1
    

    if len(pos_words) > 1:
        #get a top 2
        df["top"] = df.apply(lambda x: np.sort(x)[-2:].min(), axis=1)
        threshold = np.sort(df.top)[::-1][:topn][-1]
        max_len = 2

        
    
    df["is_top"] = df.top >= threshold
    df["neg_filter"] = df["top"] > danger_coeff*neg_df.max(axis=1)
    if len(neu_words) > 0:
        df["neu_filter"] = df["top"] > 1.2*neu_df.max(axis=1)
    df["ass_filter"] = df["top"] > danger_coeff*ass_df.max(axis=1)

    if len(neu_words) > 0:
        candidates = df.loc[df.is_top].loc[df.neu_filter].loc[df.neg_filter].loc[df.ass_filter]
    if len(neu_words) == 0:
        candidates = df.loc[df.is_top].loc[df.neg_filter].loc[df.ass_filter]

    best_clue, best_score, best_k, best_g = None, -1, 0, ()
    for clue_i, scores in enumerate(candidates.iloc[:,:len(pos_words)].values):


        #transform clue_i into the actual word
        clue_word = candidates.index[clue_i]

        if not is_stopwords(clue_word, pos_words):
            continue
        

        # Order scores by lowest to highest inner product with the clue.
        ss = sorted((s, i) for i, s in enumerate(scores))
        groups = []
        groups_score = []
        for i in range(len(scores)):
            group = ss[-(i+1):]
            group_score = []
            for tpl in group:
                group_score.append(tpl[0])

                # Calculate the "real score" by
                #    (lowest score in group) * [ (group size)^aggressiveness - 1].
                # The reason we subtract one is that we never want to have a group of
                # size 1.
            groups_score.append((np.min(group_score)) * (len(group)**agg - 0.99))
            groups.append([tpl[1] for tpl in group])


        real_score = max(groups_score)

        if real_score > best_score:
            #update
            ind = groups[np.argmax(groups_score)]
            best_g = np.array(pos_words)[ind]
            best_k = len(best_g)
            best_clue = clue_word
            best_score = real_score

    candidates = candidates.sort_values(by=['top'], ascending = False)
    
    return best_clue, best_score, best_g, candidates


In [104]:
print(B)

['entrée' 'transport' 'flamme' 'ballon' 'allumer' 'botte' 'ordure'
 'patron']


In [105]:
print(R)

['carotte' 'raie' 'banane' 'or' 'tout' 'rame' 'miel' 'guitare' 'hiver']


In [106]:
pos_words, neg_words, neu_words, assassin_word = B, R, N, A
best_clue, best_score, best_g, candidates = get_clue(pos_words, neg_words, neu_words, assassin_word, danger_coeff=1.8, agg=0.05, topn=50000, given_indices=[])
print(best_clue)
print(best_score)
print(best_g)

accès
26.162866281656513
['transport' 'entrée']


In [107]:
candidates

Unnamed: 0,0,1,2,3,4,5,6,7,top,is_top,neg_filter,neu_filter,ass_filter
tir,253.589661,135.115143,239.606827,791.559204,246.836975,162.413269,-31.142727,-16.374996,791.559204,True,True,True,True
gestion,351.623260,628.507080,-36.368187,-75.891609,-67.398323,-98.163727,78.883736,23.652739,628.507080,True,True,True,True
port,416.559814,598.648926,65.437737,148.996521,73.086639,45.767265,88.380600,148.055389,598.648926,True,True,True,True
accès,589.354858,577.994263,-62.287186,-42.331753,109.148621,-128.642593,39.340786,-43.964569,589.354858,True,True,True,True
coût,196.757614,586.029907,-18.637167,110.227989,-35.992424,-100.035156,41.058956,34.278381,586.029907,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
ukase,13.310261,-12.959485,-8.742863,-1.947150,-11.986353,10.383444,-4.625071,36.498077,36.498077,True,True,True,True
kosovar,-9.185962,-46.453529,-14.153329,-33.815472,-6.906523,1.695526,2.749983,35.353249,35.353249,True,True,True,True
inapte,-50.003677,-8.492976,4.107484,-8.236734,-51.012794,-26.039593,21.006168,29.719801,29.719801,True,True,True,True
kafkaïen,-24.333256,-41.532860,-39.257195,-22.359192,-56.171432,17.737904,28.504475,13.872112,28.504475,True,True,True,True


In [58]:
#Build plateau
class Plateau():

    #constructor
    def __init__(self, B, R, N, A, status):

        #list of words
        self.B = list(B)
        self.R = list(R)
        self.N = list(N)
        self.A = A

        #Status = 1. If 0, then game is over
        self.status = status

        #all words
        lst = np.concatenate([B, R, N], axis=0)
        words = np.concatenate([lst, [A]])
        random.shuffle(words)
        self.words = words
        


    #methods
    def update_status(self):
        """
        This method uses the inputs (words and their associated team to build new attributes: list of words for each team.
        This should be used after every update of the Plateau
        """

        #If one of these is empty: game is over. Status becomes 0, it ends the game
        if min([len(self.B), len(self.R), len(self.A)]) == 0:
            self.status = 0
            print("Game is over")

        #print(f"There is {len(self.B)} words for team BLUE\n")
        #print(f"There is {len(self.R)} words for team RED\n")
        #print(f"There is {len(self.N)} neutral words\n")
        #print(f"There is {len(self.A)} assasin\n")

        return None

    def remove_word(self, chosen_word):
        """
        This function takes a word that the user wants to remove. It prompts a message and then proceeds to remove it.
        """
        #check if in list
        if chosen_word == "next":
            print("\nYou decided to pass.\n")
            return None
        
        if chosen_word not in self.words:
            return self.remove_word(chosen_word=input("Choose another one, this one is not in the board\n>>"))
        
        else:
            self.words = np.where(self.words==chosen_word, "X", self.words) 
        #print(f" \nYou decided to remove {chosen_word}\n")

        #remove
        if chosen_word in self.B:
            print('It was a BLUE word!\n')
            self.B.remove(chosen_word)
            self.update_status()

        if chosen_word in self.R:
            print('It was a RED word!\n')
            self.R.remove(chosen_word)
            self.update_status()

        if chosen_word in self.N:
            print('It was a NEUTRAL word!\n')
            self.N.remove(chosen_word)
            self.update_status()

        if chosen_word in self.A:
            print('It was the ASSASSIN word!\n')
            self.A = []
            self.update_status()

        return None


In [193]:
#Lauch game!
B, R, N, A = generate_board()
plateau = Plateau(B, R, N, A, 1)
i = 0

given_indices = []

INDICES = {}

NB_TOURS = 0
while plateau.status == 1:
    i = (-1)*i + 1
    team_name = ["BLUE", "RED"][i]
    NB_TOURS += 1
    plateau.update_status()
    team = [plateau.B, plateau.R][i]
    opponent = [plateau.R, plateau.B][i]
    print("__________________")
    print(f"This turn, you are {team_name}")
    print(f"Words remaining for team: {len(team)}")
    print(f"Words remaining for opponent: {len(opponent)}\n")
    
    if i == 1:
        clue, score, group, _ = get_clue(team, opponent, plateau.N, plateau.A, given_indices=given_indices)
    if i == 0:
        clue, score, group, _ = get_clue(team, opponent, plateau.N, plateau.A, given_indices=given_indices)
    
    INDICES[clue] = list(group)
    given_indices.append(clue)
    print(f"The given clue: {clue}")
    print(f"Words to find: {len(group)}\n")
    for k in range(len(group)):
        print(plateau.words.reshape(5,5))
        w = input("Please chose a word to remove\n>>")
        if w in opponent or w in plateau.N:
            plateau.remove_word(w)
            print("End of turn")
            break
        
        plateau.remove_word(w)
        plateau.update_status()
        if plateau.status == 0:
            break
    
print("____________________\n")
print("____________________\n")
print("____________________\n")
print(f"NB TOURS: {NB_TOURS}")

for c in INDICES:
    print(f"Clue: {c}, words to get: {INDICES[c]}")

BOARD

[['col' 'millionnaire' 'pioche' 'scène' 'fleur']
 ['formule' 'aube' 'recette' 'figure' 'cycle']
 ['poire' 'humide' 'vingt' 'extension' 'loi']
 ['biberon' 'cacher' 'bleu' 'reptile' 'poids']
 ['top' 'heureux' 'vase' 'mode' 'mystère']]
__________________
This turn, you are RED
Words remaining for team: 9
Words remaining for opponent: 8

The given clue: plan
Words to find: 3

[['extension' 'fleur' 'cycle' 'heureux' 'pioche']
 ['poire' 'vase' 'aube' 'top' 'mystère']
 ['formule' 'mode' 'humide' 'recette' 'figure']
 ['millionnaire' 'vingt' 'loi' 'scène' 'col']
 ['biberon' 'poids' 'bleu' 'cacher' 'reptile']]
Please chose a word to remove
>>fleur
It was a BLUE word!

End of turn
__________________
This turn, you are BLUE
Words remaining for team: 7
Words remaining for opponent: 9

The given clue: star
Words to find: 2

[['extension' 'X' 'cycle' 'heureux' 'pioche']
 ['poire' 'vase' 'aube' 'top' 'mystère']
 ['formule' 'mode' 'humide' 'recette' 'figure']
 ['millionnaire' 'vingt' 'loi' 'scèn

Please chose a word to remove
>>col
It was a NEUTRAL word!

End of turn
__________________
This turn, you are RED
Words remaining for team: 1
Words remaining for opponent: 3

The given clue: noir
Words to find: 1

[['X' 'X' 'X' 'X' 'X']
 ['X' 'X' 'X' 'X' 'X']
 ['X' 'X' 'X' 'X' 'X']
 ['millionnaire' 'X' 'X' 'X' 'X']
 ['biberon' 'poids' 'bleu' 'cacher' 'reptile']]
Please chose a word to remove
>>biberon
It was a NEUTRAL word!

End of turn
__________________
This turn, you are BLUE
Words remaining for team: 3
Words remaining for opponent: 1



ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 0 is different from 300)

## Evaluation AI NLP

In [183]:
def generate_board_var(board_size):
    
    if board_size == 25:
        rs = np.array(random.sample(lst, 25))    
        np.random.shuffle(rs)  
        J1_words = rs[:8]
        J2_words = rs[8:17]
        N_words = rs[17:24]
        A_words = rs[24]
    
    elif board_size == 20:
        rs = np.array(random.sample(lst, 20))   
        np.random.shuffle(rs)  
        J1_words = rs[:6]
        J2_words = rs[6:12]
        N_words = rs[12:19]
        A_words = rs[19]
    
    elif board_size == 15:
        rs = np.array(random.sample(lst, 15))    
        np.random.shuffle(rs)  
        J1_words = rs[:4]
        J2_words = rs[4:8]
        N_words = rs[8:14]
        A_words = rs[14]
    
    elif board_size == 10:
        rs = np.array(random.sample(lst, 10)) 
        np.random.shuffle(rs)  
        J1_words = rs[:3]
        J2_words = rs[3:6]
        N_words = rs[6:9]
        A_words = rs[9]
    
    return J1_words, J2_words, N_words, A_words

In [186]:
def evaluation_nlp(n_iterations=1):
    
    n_J1, n_J2, n_N, n_A = 0, 0, 0, 0
    
    for i in range(n_iterations):
        
        list_sizes = [25, 20, 15, 10]
        for board_size in list_sizes:
            pos_words, neg_words, neu_words, assassin_word = generate_board_var(board_size)
            plateau = Plateau(pos_words, neg_words, neu_words, assassin_word, 1)
            
            clue, score, group, _ = get_clue(pos_words, neg_words, neu_words, assassin_word, danger_coeff=1.8, agg=0.05, topn=50000, given_indices=[])
            
            print(f"The given clue: {clue}")
            print(f"Words to find: {len(group)}\n")
            for k in range(len(group)):
                print(plateau.words.reshape(board_size//5,5))
                w = input("Please chose a word to remove\n>>")
                while w not in plateau.words:
                    w = input("Please chose a word from the board\n>>")
                if w == "next":
                    print("You decided to pass.")
                    break
                elif w in plateau.R:
                    print("Negative word")
                    n_J2 += 1
                    break
                elif w in plateau.N:
                    print("Neutral word")
                    n_N += 1
                    break
                elif w in plateau.A:
                    print("Assassin word")
                    n_A += 1
                    break
                else: 
                    print("Positive word")
                    n_J1 += 1        
                    plateau.remove_word(w)
                    plateau.update_status()
                    if plateau.status == 0:
                        break
    
    n_J1 /= 4*n_iterations
    n_J2 /= 4*n_iterations
    n_N /= 4*n_iterations
    n_A /= 4*n_iterations
            
    return n_J1, n_J2, n_N, n_A        

In [187]:
n_J1, n_J2, n_N, n_A = evaluation_nlp()

The given clue: règne
Words to find: 2

[['bretelle' 'cassette' 'vaisseau' 'pioche' 'baguette']
 ['chasse' 'sain' 'trône' 'amérique' 'col']
 ['station' 'plume' 'vie' 'debout' 'bière']
 ['titre' 'colline' 'soleil' 'siège' 'vague']
 ['poussière' 'australie' 'canada' 'indien' 'boule']]
Please chose a word to remove
>>siège
Negative word
The given clue: onde
Words to find: 3

[['flamme' 'danse' 'fouet' 'pigeon' 'appétit']
 ['nous' 'trou' 'sirène' 'chiffre' 'ski']
 ['dix' 'éventail' 'vision' 'internet' 'taxi']
 ['extension' 'grenouille' 'miaou' 'gomme' 'boulet']]
Please chose a word to remove
>>internet
Negative word
The given clue: aire
Words to find: 4

[['appétit' 'papa' 'moderne' 'gel' 'ensemble']
 ['commerce' 'altitude' 'italie' 'bateau' 'crac']
 ['cône' 'couronne' 'vive' 'lunette' 'chou']]
Please chose a word to remove
>>altitude
Positive word
[['appétit' 'papa' 'moderne' 'gel' 'ensemble']
 ['commerce' 'X' 'italie' 'bateau' 'crac']
 ['cône' 'couronne' 'vive' 'lunette' 'chou']]
Please 

In [188]:
(n_J1, n_J2, n_N, n_A)

(0.5, 0.75, 0.25, 0.0)

In [189]:
def agregation_NLP(n_J1, n_J2, n_N, n_A):
    print("IA score: ", n_J1 - n_J2 - 3*n_A)
    return n_J1 - n_J2 - 3*n_A

In [191]:
agregation_NLP(n_J1, n_J2, n_N, n_A)

IA score:  -0.25


-0.25

## Evaluation AI In-Game

In [74]:
def evaluation_ingame(n_iterations=2):
    
    dict_scores = {'n_J1': [], 'n_J2': [], 'n_A': [], 'T': []}
    
    for i in range(1, n_iterations+1):
        
        print(f"Game {i}/{n_iterations}")
        print("You play the RED team")
        n_J1, n_J2, n_A, T = 0, 0, 0, 0
    
        B, R, N, A = generate_board()
        plateau = Plateau(B, R, N, A, 1)
        turn = 1

        given_indices = []
        
        while plateau.status == 1:

            plateau.update_status()
            team = plateau.R
            opponent = plateau.B
            
            print(f"___Turn {turn}____")
            print(f"Words remaining for team: {len(team)}")
            print(f"Words remaining for opponent: {len(opponent)}\n")
            
            # Turn of J1 (Red)
            clue, score, group, _ = get_clue(team, opponent, plateau.N, plateau.A, given_indices=given_indices)
    
            given_indices.append(clue)
            print(f"The given clue: {clue}")
            print(f"Words to find: {len(group)}\n")
        
            for k in range(len(group)):
            
                print(plateau.words.reshape(5,5))
                w = input("Please chose a word to remove\n>>")
                
                if w in opponent:
                    n_J2 += 1
                    plateau.remove_word(w)
                    print("End of turn")
                    break
                    
                if w in plateau.N:
                    plateau.remove_word(w)
                    print("End of turn")
                    break
        
                if w in plateau.A:
                    n_A += 1
                    plateau.remove_word(w)
                    print("End of turn")
                    break
                    
                if w in plateau.R:
                    n_J1 += 1
                    plateau.remove_word(w)
                    
            if plateau.status == 0:
                break
                    
            # Turn of J2 (Blue)
            if turn % 2 == 1 :
                remove_J2 = random.choice(plateau.B)
                print("Word removed for J2: ", remove_J2)
                plateau.remove_word(remove_J2)
                if plateau.status == 0:
                    break
                remove_J2 = random.choice(plateau.B)
                print("Word removed for J2: ", remove_J2)
                plateau.remove_word(remove_J2)
                if plateau.status == 0:
                    break
                if len(plateau.N) > 0:
                    remove_N = random.choice(plateau.N)
                    print("Word removed for N: ", remove_N)
                    plateau.remove_word(remove_N)
                    
            if turn % 2 == 0 :
                remove_J2 = random.choice(plateau.B)
                print("Word removed for J2: ", remove_J2)
                plateau.remove_word(remove_J2)
                if plateau.status == 0:
                    break
                remove_J1 = random.choice(plateau.R)
                print("Word removed for J1: ", remove_J1)
                plateau.remove_word(remove_J1)
                if plateau.status == 0:
                    break                  
                    
            turn += 1     
        
        print(f"Score for this game: n_J1 = {n_J1}, n_J2 = {n_J2}, n_A = {n_A}, T = {turn}")
        
        dict_scores['n_J1'].append(n_J1)
        dict_scores['n_J2'].append(n_J2) 
        dict_scores['n_A'].append(n_A)  
        dict_scores['T'].append(turn)
                    
    return dict_scores

In [75]:
dict_scores = evaluation_ingame(n_iterations=2)

Game 1/2
You play the RED team
BOARD

[['casque' 'groupe' 'volume' 'fleur' 'cinéma']
 ['arme' 'forger' 'jet' 'prêt' 'caisse']
 ['pirate' 'papa' 'napoléon' 'pousser' 'premier']
 ['indien' 'coton' 'avocat' 'aile' 'louis']
 ['ver' 'feu' 'couteau' 'poste' 'population']]
___Turn 1____
Words remaining for team: 9
Words remaining for opponent: 8

The given clue: main
Words to find: 5

[['volume' 'aile' 'poste' 'arme' 'papa']
 ['indien' 'napoléon' 'groupe' 'caisse' 'casque']
 ['coton' 'cinéma' 'fleur' 'feu' 'louis']
 ['couteau' 'ver' 'prêt' 'pirate' 'pousser']
 ['forger' 'population' 'premier' 'jet' 'avocat']]
Please chose a word to remove
>>arme
It was a RED word!

[['volume' 'aile' 'poste' 'X' 'papa']
 ['indien' 'napoléon' 'groupe' 'caisse' 'casque']
 ['coton' 'cinéma' 'fleur' 'feu' 'louis']
 ['couteau' 'ver' 'prêt' 'pirate' 'pousser']
 ['forger' 'population' 'premier' 'jet' 'avocat']]
Please chose a word to remove
>>pousser
It was a NEUTRAL word!

End of turn
Word removed for J2:  louis
It 

In [76]:
dict_scores

{'n_J1': [2, 2], 'n_J2': [2, 2], 'n_A': [0, 0], 'T': [4, 4]}

In [79]:
def agregation_ingame(dict_scores):
    score, iters = 0, len(dict_scores['n_J1'])
    for i in range(iters):
        score += ((dict_scores['n_J1'][i] - dict_scores['n_J2'][i] - 2*dict_scores['n_A'][i])/dict_scores['T'][i])
    score /= iters
    print("IA score: ", score)
    return score

In [80]:
agregation_ingame(dict_scores)

IA score:  0.0


0.0