# LionLinker Testing Notebook
This notebook is used to test each component of the LionLinker pipeline individually.

In [None]:
from lion_linker.lion_linker import LionLinker

# Define the path to the files
input_csv = 'tests/data/film.csv'
prompt_file = 'prompt_template.txt'
model_name = 'gemma2:2b'  # Replace with the actual model you are using
api_url = 'https://lamapi.hel.sintef.cloud/lookup/entity-retrieval'  # Replace with the actual API URL
api_token = 'lamapi_demo_2023'  # Replace with your API token if applicable
output_csv = 'output_test.csv'
batch_size = 10  # Small batch size for testing
model_api_provider='ollama' # Replace with the API provider
#model_api_provider='openai' #  to test openai
#model_api_provider='groq' # to test groq
model_api_key='' # keep empty for ollama, but need for openai and groq

# Initialize the LionLinker instance
lion_linker = LionLinker(input_csv, prompt_file, model_name, api_url, api_token, output_csv, 
                         batch_size, ["title"], api_limit=10, compact_candidates=False)
await lion_linker.run()


In [None]:
!python3 cli.py tests/data/film.csv output_test.csv --api-url https://lamapi.hel.sintef.cloud/lookup/entity-retrieval --api-token lamapi_demo_2023 --prompt-file prompt_template.txt --model gemma2:2b --batch-size 10 --mention_columns title

In [None]:
from lion_linker.lion_linker import PromptGenerator, APIClient

candidates = await APIClient("https://lamapi.hel.sintef.cloud/lookup/entity-retrieval", "lamapi_demo_2023").fetch_multiple_entities(["Titanic", "James Cameron"])
print(PromptGenerator("prompt_template.txt").generate_prompt("The film Titanic was directed by James Cameron.", 
                                                             "title", "", "", candidates["Titanic"], compact=True))

## Test API Interaction
Test the `APIClient` class to ensure it can fetch data correctly.

In [10]:
import pandas as pd
result = pd.read_csv('./tests/data/testing.csv')
gt = pd.read_csv('./tests/data/film_with_QIDs.csv')
# Extract GT column and remove it from the input
gt_column = gt['Title_QID']
# Compute accuracy by comparing predicted QIDs with the ground truth
accuracy = (result["id"] == gt_column).mean()

# Print accuracy
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 100.00%


In [2]:
api_client = lion_linker.api_client
candidates = await api_client.fetch_multiple_entities(["Matrix", "Inception"])
candidates

{'Matrix': [{'id': 'Q44337',
   'name': 'matrix',
   'description': 'rectangular array of numbers, symbols, or expressions, arranged in rows and columns',
   'types': []},
  {'id': 'Q83495',
   'name': 'Matrix',
   'description': '1999 American science fiction action thriller film',
   'types': [{'id': 'Q11424', 'name': 'film'}]},
  {'id': 'Q193825',
   'name': 'Matrix',
   'description': 'structure external to cells, which provides structural support for cells or tissues',
   'types': [{'id': 'Q5058355', 'name': 'cellular component'}]},
  {'id': 'Q190069',
   'name': 'Matrix',
   'description': 'Wikimedia disambiguation page',
   'types': [{'id': 'Q4167410', 'name': 'Wikimedia disambiguation page'}]},
  {'id': 'Q1463013',
   'name': 'matrix',
   'description': 'geological term for the mass of material in which larger grains, crystals or clasts are embedded',
   'types': [{'id': 'Q35758', 'name': 'matter'}]},
  {'id': 'Q489649',
   'name': 'Matrix',
   'description': 'car model',
   't

In [2]:
import pandas as pd
from lion_linker.lion_linker import LionLinker

# Define paths to the files
input_csv = './tests/data/film_with_QIDs.csv'  # Replace with the actual file path
prompt_file = 'prompt_template.txt'  # File containing the prompt template
output_csv = 'output_test.csv'  # File where LionLinker's results will be saved

# Load the input CSV
df = pd.read_csv(input_csv)

# Extract GT column and remove it from the input
gt_column = df['Title_QID']
input_df = df.drop(columns=['Title_QID'])  # Input without GT

# Save the modified input data for LionLinker
input_csv_no_QIDs = './tests/data/film_input_no_QIDs.csv'
input_df.to_csv(input_csv_no_QIDs, index=False)

# Define LionLinker parameters
model_name = 'mistral'  # Replace with your model name
api_url = 'https://lamapi.hel.sintef.cloud/lookup/entity-retrieval'  # API URL
api_token = 'lamapi_demo_2023'  # Your API token (if needed)
batch_size = 10  # Define the batch size

# Initialize LionLinker
lion_linker = LionLinker(
    input_csv_no_QIDs,  # Input CSV without the QIDs
    prompt_file,
    model_name,
    api_url,
    api_token,
    output_csv,  # File to save the results
    batch_size,
    ["title"],  # List of columns to use for entity linking
    api_limit=20,
    compact_candidates=True 
)

# Run LionLinker asynchronously
await lion_linker.run()

# Load LionLinker's results and compare with GT
results_df = pd.read_csv(output_csv)  # Load LionLinker's output

# Assuming LionLinker output has a 'predicted_QID' column
predicted_QIDs = results_df['Extracted Identifier']

# Compute accuracy by comparing predicted QIDs with the ground truth
accuracy = (predicted_QIDs == gt_column).mean()

# Print accuracy
print(f"Accuracy: {accuracy * 100:.2f}%")

2024-09-19 16:47:50,611 - INFO - Initializing components...
2024-09-19 16:47:50,613 - INFO - Setup completed.
2024-09-19 16:47:50,614 - INFO - Starting processing of ./tests/data/film_input_no_QIDs.csv...


Processing Batches:   0%|          | 0/31 [00:00<?, ?it/s]2024-09-19 16:47:52,344 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-19 16:48:03,306 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-19 16:48:07,992 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-19 16:48:11,534 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-19 16:48:15,267 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-19 16:48:19,635 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-19 16:48:23,792 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-19 16:48:31,327 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-19 16:48:33,855 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-09-19 16:48:40,414 

Accuracy: 66.67%
