# LionLinker Testing Notebook
This notebook is used to test each component of the LionLinker pipeline individually.

In [8]:
from lion_linker.lion_linker import LionLinker

# Define the path to the files
input_csv = 'tests/data/film.csv'
prompt_file = 'prompt_template.txt'
model_name = 'gemma2:2b'  # Replace with the actual model you are using
api_url = 'https://lamapi.hel.sintef.cloud/lookup/entity-retrieval'  # Replace with the actual API URL
api_token = 'lamapi_demo_2023'  # Replace with your API token if applicable
output_csv = 'output_test.csv'
batch_size = 10  # Small batch size for testing

# Initialize the LionLinker instance
lion_linker = LionLinker(input_csv, prompt_file, model_name, api_url, api_token, output_csv, 
                         batch_size, ["title"], api_limit=10, compact_candidates=False)
await lion_linker.run()


2024-09-17 14:14:24,472 - INFO - Initializing components...
2024-09-17 14:14:24,476 - INFO - Setup completed.
2024-09-17 14:14:24,476 - INFO - Starting processing of tests/data/film.csv...
2024-09-17 14:14:25,122 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-17 14:14:27,291 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-17 14:14:28,339 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-17 14:14:29,227 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-17 14:14:30,319 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-17 14:14:31,454 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-17 14:14:32,585 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-17 14:14:33,484 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200

In [7]:
!python3 cli.py tests/data/film.csv output_test.csv --api-url https://lamapi.hel.sintef.cloud/lookup/entity-retrieval --api-token lamapi_demo_2023 --prompt-file prompt_template.txt --model gemma2:2b --batch-size 10 --mention_columns title

2024-08-27 15:03:25,194 - INFO - Initializing components...
2024-08-27 15:03:25,196 - INFO - Setup completed.
2024-08-27 15:03:25,196 - INFO - Starting processing of tests/data/film.csv...
Processing Batches:   0%|                                | 0/26 [00:00<?, ?it/s]2024-08-27 15:03:27,762 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-08-27 15:03:29,298 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-08-27 15:03:30,084 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-08-27 15:03:30,850 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-08-27 15:03:31,592 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-08-27 15:03:32,401 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-08-27 15:03:33,150 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-08-27 15:03:

In [6]:
from lion_linker.lion_linker import PromptGenerator, APIClient

candidates = await APIClient("https://lamapi.hel.sintef.cloud/lookup/entity-retrieval", "lamapi_demo_2023").fetch_multiple_entities(["Titanic", "James Cameron"])
print(PromptGenerator("prompt_template.txt").generate_prompt("The film Titanic was directed by James Cameron.", 
                                                             "title", "", "", candidates["Titanic"], compact=True))

Here is the summary of the table:
The film Titanic was directed by James Cameron.

Here is the data for the current row:
title

The column name in question is:


The entity mention is:


Possible candidates for the entity are:
[{"id":"Q2478025","name":"Titanic","description":"soundtrack album for the 1997 film Titanic","types":[{"id":"Q482994","name":"album"}]},{"id":"Q122032189","name":"Titanic","description":"book edition published in 2018","types":[{"id":"Q3331189","name":"version, edition or translation"}]},{"id":"Q3529506","name":"Titanic","description":"1998 debut studio album by Wenge BCBG Les Anges Adorables","types":[{"id":"Q482994","name":"album"}]},{"id":"Q12059061","name":"Titanic","description":"Czech band","types":[{"id":"Q215380","name":"musical group"}]},{"id":"Q25417640","name":"Titanic","description":"Wikimedia disambiguation page","types":[{"id":"Q4167410","name":"Wikimedia disambiguation page"}]},{"id":"Q84727764","name":"Titanic","description":"passenger/general ca

In [10]:
candidates

{'Titanic': [{'id': 'Q2478025',
   'name': 'Titanic',
   'description': 'soundtrack album for the 1997 film Titanic',
   'types': [{'id': 'Q482994', 'name': 'album'}],
   'kind': 'entity',
   'NERtype': 'OTHERS',
   'ambiguity_mention': 1.0,
   'corrects_tokens': 1.0,
   'ntoken_mention': 1,
   'ntoken_entity': 1,
   'length_mention': 7,
   'length_entity': 7,
   'popularity': 0.02,
   'pos_score': 0.1,
   'es_score': 1.0,
   'ed_score': 1.0,
   'jaccard_score': 1.0,
   'jaccardNgram_score': 1.0},
  {'id': 'Q122032189',
   'name': 'Titanic',
   'description': 'book edition published in 2018',
   'types': [{'id': 'Q3331189', 'name': 'version, edition or translation'}],
   'kind': 'entity',
   'NERtype': 'OTHERS',
   'ambiguity_mention': 1.0,
   'corrects_tokens': 1.0,
   'ntoken_mention': 1,
   'ntoken_entity': 1,
   'length_mention': 7,
   'length_entity': 7,
   'popularity': 0.0,
   'pos_score': 0.2,
   'es_score': 1.0,
   'ed_score': 1.0,
   'jaccard_score': 1.0,
   'jaccardNgram_sco

## Test API Interaction
Test the `APIClient` class to ensure it can fetch data correctly.

In [3]:
from tqdm.asyncio import tqdm


In [2]:
api_client = lion_linker.api_client
candidates = await api_client.fetch_multiple_entities(["Matrix", "Inception"])
candidates

{'Matrix': [{'id': 'Q44337',
   'name': 'matrix',
   'description': 'rectangular array of numbers, symbols, or expressions, arranged in rows and columns',
   'types': []},
  {'id': 'Q83495',
   'name': 'Matrix',
   'description': '1999 American science fiction action thriller film',
   'types': [{'id': 'Q11424', 'name': 'film'}]},
  {'id': 'Q193825',
   'name': 'Matrix',
   'description': 'structure external to cells, which provides structural support for cells or tissues',
   'types': [{'id': 'Q5058355', 'name': 'cellular component'}]},
  {'id': 'Q190069',
   'name': 'Matrix',
   'description': 'Wikimedia disambiguation page',
   'types': [{'id': 'Q4167410', 'name': 'Wikimedia disambiguation page'}]},
  {'id': 'Q1463013',
   'name': 'matrix',
   'description': 'geological term for the mass of material in which larger grains, crystals or clasts are embedded',
   'types': [{'id': 'Q35758', 'name': 'matter'}]},
  {'id': 'Q489649',
   'name': 'Matrix',
   'description': 'car model',
   't