# Are the LLMs playing like a human? 
Goal: check if the LLMs have a behaviour similar to humans on Wikispeedia.

Strategy: make a LLM play to 2007 Wikispeedia and compare its answers with human paths.

Steps for a MWE:
1. decide which path to use (take one played a lot by people to have more data to compare with)
2. take a LLM
3. define the prompt
4. make it play
5. compare with paths of humans

Points to adjust:
- try different paths
- try different LLMs
- try different prompts


**Warning Required Docker** : Need start the **Ollama** server with Docker (Follow instruction in `SETUP.md`)

`docker start <container name>`

In [1]:
import os
import ollama # Follow instructions in SETUP.md to install ollama
from ollama import Client

import pandas as pd 
from tqdm import tqdm

In [2]:
# Constants
DATA_FOLDER = "../../data/"
DATA_PATH = DATA_FOLDER + "wikispeedia_paths-and-graph/"
MODEL = "llama3" # either "mistral" or "llama3"
YEAR = 2024 # either 2007 or 2024
HOST = "http://localhost:11434" 

# Paths to use to try the LLMs
Let's find the most finished paths. 

In [3]:
# load tsv files into pandas dataframes
path_finished = pd.read_csv(os.path.join(DATA_PATH, 'paths_finished.tsv'), sep='\t', comment='#', names=['hashedIpAddress', 'timestamp', 'durationInSec', 'path', 'rating'])
path_unfinished = pd.read_csv(os.path.join(DATA_PATH, 'paths_unfinished.tsv'), sep='\t', comment='#', names=["hashedIpAddress", "timestamp", "durationInSec", "path", "target", "type"])
print(f"number of finished paths: {len(path_finished)}")
print(f"number of unfinished paths: {len(path_unfinished)}")
print(f"total number of paths: {len(path_finished) + len(path_unfinished)}")

path_finished.path = path_finished.path.str.split(';')
path_finished["length"] = path_finished.path.map(len) 
path_finished['start'] = path_finished.path.str[0]
path_finished['end'] = path_finished.path.str[-1]
path_unfinished.path = path_unfinished.path.str.split(';')
path_unfinished["start"] = path_unfinished.path.str[0]
path_unfinished["end"] = path_unfinished.target
path_finished["finished"] = 1
path_unfinished["finished"] = 0
path_finished["count"] = 1
path_unfinished["count"] = 1

# combine finished and unfinished paths
paths = pd.concat([path_finished[['start','end','count']], path_unfinished[['start','end','count']]])
by_path = paths[["start","end","count"]].groupby(by=['start', 'end']).count().sort_values(by='count', ascending=False).reset_index()
print(f"number of unique paths: {len(by_path)}")
by_path

number of finished paths: 51318
number of unfinished paths: 24875
total number of paths: 76193
number of unique paths: 42631


Unnamed: 0,start,end,count
0,Brain,Telephone,2044
1,Theatre,Zebra,1777
2,Asteroid,Viking,1770
3,Pyramid,Bean,1586
4,Batman,Wood,223
...,...,...,...
42626,Gold,Ariel_Sharon,1
42627,Gold,African_Wild_Dog,1
42628,God,Wallis_and_Futuna,1
42629,God,Vole,1


In [4]:
path_finished.sort_values(by='length', ascending=False).head(10)

Unnamed: 0,hashedIpAddress,timestamp,durationInSec,path,rating,length,start,end,finished,count
8046,7f6da01d55086766,1248928447,2205,"[United_States, Washington%2C_D.C., Government...",5.0,435,United_States,English_language,1,1
19961,4f1b9ce764349901,1236685130,2292,"[Napoleon_I_of_France, United_Kingdom, Great_B...",,119,Napoleon_I_of_France,Recycling,1,1
6945,4e97d9af4476607f,1248925083,1141,"[Police_state, Police, United_States, Washingt...",5.0,104,Police_state,African_slave_trade,1,1
27136,0d57c8c57d75e2f5,1290096241,1852,"[Lake_Toba, Netherlands, Europe, Earth, United...",5.0,99,Lake_Toba,Looney_Tunes,1,1
22845,0b983de502e0ee53,1315954360,1272,"[Britney_Spears, Las_Vegas%2C_Nevada, <, Las_V...",5.0,96,Britney_Spears,Extraterrestrial_life,1,1
21075,6e120fc612138ce9,1231918753,1491,"[Rugby_World_Cup, Europe, Italy, <, <, Contine...",,96,Rugby_World_Cup,Volt,1,1
45703,3d106fd97346d073,1248999819,1051,"[Rabbit, Japan, Attack_on_Pearl_Harbor, United...",5.0,87,Rabbit,The_X-Files,1,1
3629,6616bb9562890cb9,1222830740,971,"[David_Beckham, Munich, Onion_dome, Onion, Pla...",4.0,86,David_Beckham,Sorrel,1,1
11639,63cc0ee203721110,1257470899,1234,"[Central_processing_unit, Computer, Computer_p...",4.0,85,Central_processing_unit,Meningitis,1,1
35321,25e30db000ae5901,1249418314,1972,"[Battle_of_Gettysburg, American_Civil_War, Eur...",5.0,80,Battle_of_Gettysburg,First_Macedonian_War,1,1


In [5]:
if YEAR == 2007:
    df_links = pd.read_csv(os.path.join(DATA_FOLDER+f"{YEAR}/", 'links.tsv'), sep='\t', comment='#', names=['linkSource', 'linkTarget'])

elif YEAR == 2024:
    df_links = pd.read_csv(os.path.join(DATA_FOLDER+f"{YEAR}/", 'links2024.csv'), names=['linkSource', 'linkTarget'])
df_links

Unnamed: 0,linkSource,linkTarget
0,linkSource,linkTarget
1,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,D%C3%A1l_Riata
2,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,Columba
3,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,Orkney
4,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,Isle_of_Man
...,...,...
224050,Zuid-Gelders,Afrikaans
224051,Zuid-Gelders,West_Flemish
224052,Zuid-Gelders,East_Flemish
224053,Zuid-Gelders,German_language


In [6]:
by_path = by_path[by_path["count"] > 10]
by_path

Unnamed: 0,start,end,count
0,Brain,Telephone,2044
1,Theatre,Zebra,1777
2,Asteroid,Viking,1770
3,Pyramid,Bean,1586
4,Batman,Wood,223
...,...,...,...
208,Barbados,Virus,11
209,ASCII,Swastika,11
210,Necktie,Lion,11
211,Euro,Whale,11


In [7]:
def get_links(article):
    """
    Get all links from a given article

    Args:
    article (str): article name

    Returns:
    list: list of articles linked to
    """
    return df_links[df_links.linkSource == article].linkTarget.values.tolist()

In [8]:
# Prompt inspired by (Human vs AI) https://drudilorenzo.github.io/ada-klech-data-story/
def llm_input(target, links):
    """
    Format input for LLM

    Args:
    target (str): target article
    links (list): list of articles linked to

    Returns:
    str: formatted input
    """

    return f"""I will give you a target word and a list from which you can choose an option. If the available options contains the target word, you choose it. Otherwise you choose the option that is most similar to it    

    Target word: [{target}]

    Available options: [{links}]

    RESPECT THIS FORMAT WHEN ANSWERING:

    Reasoning: [REASONING]

    Answer: Hence the choice is: '[ANSWER]'"""
    
llm_input('George_Washington', ['Able_Archer_83', 'Afghanistan', 'Estonia', 'Europe', 'Finland', 'France', 'French_language', 'George_W._Bush', 'Hungary', 'September_11,_2001_attacks', 'United_States'])


"I will give you a target word and a list from which you can choose an option. If the available options contains the target word, you choose it. Otherwise you choose the option that is most similar to it    \n\n    Target word: [George_Washington]\n\n    Available options: [['Able_Archer_83', 'Afghanistan', 'Estonia', 'Europe', 'Finland', 'France', 'French_language', 'George_W._Bush', 'Hungary', 'September_11,_2001_attacks', 'United_States']]\n\n    RESPECT THIS FORMAT WHEN ANSWERING:\n\n    Reasoning: [REASONING]\n\n    Answer: Hence the choice is: '[ANSWER]'"

In [9]:
get_links('George_Washington')

['President_of_the_United_States',
 'John_Adams',
 'Alexander_Hamilton',
 'John_Tyler',
 'Thomas_Jefferson',
 'American_Revolutionary_War',
 'French_Revolution',
 'United_States_dollar',
 'Trigonometry',
 'Barbados',
 'Smallpox',
 'Atlantic_slave_trade',
 'Philadelphia',
 'New_York_City',
 'Benjamin_Franklin',
 'United_States_Declaration_of_Independence',
 'Royal_Navy',
 'Pound_sterling',
 'Agriculture',
 'Great_Lakes',
 'Florida',
 'Abraham_Lincoln',
 'Gettysburg_Address',
 'John_Marshall',
 'United_States_Congress',
 'Anglicanism',
 'Church_of_England',
 'Book_of_Common_Prayer',
 'Gerald_Ford',
 'Mount_Rushmore',
 'Library_of_Congress',
 'James_Monroe',
 'Andrew_Jackson',
 'Martin_Van_Buren',
 'William_Henry_Harrison',
 'James_K._Polk',
 'Zachary_Taylor',
 'Millard_Fillmore',
 'Franklin_Pierce',
 'James_Buchanan',
 'Andrew_Johnson',
 'Ulysses_S._Grant',
 'Rutherford_B._Hayes',
 'Chester_A._Arthur',
 'Grover_Cleveland',
 'Benjamin_Harrison',
 'William_McKinley',
 'Theodore_Roosevelt',

In [10]:
# Create a client
client = Client(host="http://localhost:11434")

In [11]:
# Give the context of wikispeedia game to the model
try:
    response = client.chat(model=MODEL, messages=[
                {
                    'role': 'user',
                    'content': """We now play the following game:

    I will give you a target word and a list from which you can choose an option. If the available options contains the target word, you choose it. Otherwise you choose the option that is most similar to it. Before starting, I give you one examples, then it's your turn:

    you need to follow the same format as the example below:
    Target word: George_Washington

    Available options: [Able_Archer_83, Afghanistan, , Estonia, Europe, Finland, France, French_language, George_W._Bush, Hungary, September_11,_2001_attacks, United_States]

    Reasoning: I need to find something inside the list related to the target: 'George_Washington'. George Washington was the first president of United States and he lived in United States.

    Answer: Hence the answer is: 'United_States'.""",
                },
            ])
    
    print(response['message']['content'])

except Exception as e:
    print(e)
    print("Start your docker container and try again `docker start <container name>`")

I'm ready to play. Please provide the target word and available options, and I'll follow the format you showed.

Also, thanks for explaining the rules and providing an example!


In [12]:
def llm_player(start, end):
    """
    Play the game of Wikispeedia

    Args:
    start (str): start article
    end (str): end article

    Returns:
    list: path taken
    """
    path = [start]
    current = start
    cpt = 0
    while current != end and cpt < 100:
        links = list(set(get_links(current)))
        response = client.chat(model=MODEL, messages=[
            {
                'role': 'user',
                'content': llm_input(end, links),
            }
        ],
            options={'temperature': 0.2}
            )
        answer = response['message']['content'].split('\n')[-1]
        for link in links:
            if link in answer:
                current = link
                path.append(current)
                break
        cpt += 1

    if cpt >= 100:
        print("No path found")
        return None
    return path


In [None]:
# for all start and end articles
llm_paths = []
for start, end in tqdm(by_path[['start', 'end']].values, total=len(by_path)):
    path = llm_player(start, end)
    llm_paths.append({'start': start, 'end': end, 'path': path})
df_llm_paths = pd.DataFrame(llm_paths)

In [None]:
df_llm_paths.head()

In [None]:
df_llm_paths.to_csv(os.path.join(DATA_FOLDER+MODEL +"/",f"llm_paths{ 2024 if YEAR==2024 else '' }.csv"), index=False)