# LionLinker Testing Notebook
This notebook is used to test each component of the LionLinker pipeline individually.

In [None]:
from dotenv import load_dotenv
import os
from lion_linker.lion_linker import LionLinker

# Load environment variables from the .env file
load_dotenv()

# Define the path to the files
input_csv = 'tests/data/film.csv'
prompt_file = 'prompt_template.txt'
model_name = 'gemma2:2b'  # Replace with the actual model you are using
# Retrieve values from environment variables
api_url = os.getenv('API_URL') 
api_token = os.getenv('API_TOKEN')
output_csv = 'output_test.csv'
batch_size = 10  # Small batch size for testing
model_api_provider='ollama' # Replace with the API provider
#model_api_provider='openai' #  to test openai
#model_api_provider='groq' # to test groq
model_api_key='' # keep empty for ollama, but need for openai and groq

# Initialize the LionLinker instance
lion_linker = LionLinker(input_csv, prompt_file, model_name, api_url, api_token, output_csv, 
                         batch_size, ["title"], api_limit=10, compact_candidates=False)
await lion_linker.run()


In [None]:
!python3 cli.py tests/data/film.csv output_test.csv --api-url $api_url --api-token $api_token --prompt-file prompt_template.txt --model llama3.2:1b --batch-size 10 --mention_columns title

In [12]:
from lion_linker.lion_linker import PromptGenerator, APIClient

candidates = await APIClient(api_url, api_token).fetch_multiple_entities(["Titanic", "James Cameron"])
print(PromptGenerator("prompt_template.txt").generate_prompt("The film Titanic was directed by James Cameron.", 
                                                             "title", "", "", candidates["Titanic"], compact=True))

Here is the summary of the table:
The film Titanic was directed by James Cameron.

Here is the data for the current row:
title

The column name in question is:


The entity mention is:


Possible candidates for the entity are:
[{"id":"Q2478025","name":"Titanic","description":"soundtrack album for the 1997 film Titanic","types":[{"id":"Q482994","name":"album"}]},{"id":"Q122032189","name":"Titanic","description":"book edition published in 2018","types":[{"id":"Q3331189","name":"version, edition or translation"}]},{"id":"Q3529506","name":"Titanic","description":"1998 debut studio album by Wenge BCBG Les Anges Adorables","types":[{"id":"Q482994","name":"album"}]},{"id":"Q12059061","name":"Titanic","description":"Czech band","types":[{"id":"Q215380","name":"musical group"}]},{"id":"Q25417640","name":"Titanic","description":"Wikimedia disambiguation page","types":[{"id":"Q4167410","name":"Wikimedia disambiguation page"}]},{"id":"Q84727764","name":"Titanic","description":"passenger/general ca

## Test API Interaction
Test the `APIClient` class to ensure it can fetch data correctly.

In [14]:
api_client = lion_linker.api_client
candidates = await api_client.fetch_multiple_entities(["Matrix", "Inception"])
candidates

{'Matrix': [{'id': 'Q6787843',
   'name': 'Matrix',
   'description': '1999 novel by Mike Tucker and Robert Perry',
   'types': [{'id': 'Q7725634', 'name': 'literary work'}]},
  {'id': 'Q22075162',
   'name': 'Matrix',
   'description': 'extended play (EP) record by South Korean boy group B.A.P.',
   'types': [{'id': 'Q169930', 'name': 'extended play'}]},
  {'id': 'Q11231115',
   'name': 'MATRIX',
   'description': 'album',
   'types': [{'id': 'Q482994', 'name': 'album'}]},
  {'id': 'Q685816',
   'name': 'matrix',
   'description': 'chemical analysis term; the components of a sample other than the analyte of interest',
   'types': [{'id': 'Q12812139', 'name': 'technical term'}]},
  {'id': 'Q59185651',
   'name': 'MATRIX',
   'description': 'scholarly article',
   'types': [{'id': 'Q13442814', 'name': 'scholarly article'}]},
  {'id': 'Q50321228',
   'name': 'Matrix',
   'description': 'painting by Morris Louis',
   'types': [{'id': 'Q3305213', 'name': 'painting'}]},
  {'id': 'Q54982940'

In [15]:
from dotenv import load_dotenv
import pandas as pd
import os
from lion_linker.lion_linker import LionLinker

# Load environment variables from the .env file
load_dotenv()

# Define paths to the files
input_csv = './tests/data/film_with_QIDs.csv'  # Input file with GT column
prompt_file = 'prompt_template.txt'  # File containing the prompt template
output_csv = 'output_test.csv'  # File where LionLinker's results will be saved


# Define LionLinker parameters
model_name = 'llama3.2:1b'  # Replace with your model name
# Retrieve values from environment variables
api_url = os.getenv('API_URL') 
api_token = os.getenv('API_TOKEN')
batch_size = 10  # Define the batch size

# Initialize LionLinker with the GT column specified for exclusion
lion_linker = LionLinker(
    input_csv=input_csv,  # Input CSV without the GT column
    prompt_file=prompt_file,
    model_name=model_name,
    api_url=api_url,
    api_token=api_token,
    output_csv=output_csv,  # Output file for results
    batch_size=batch_size,
    mention_columns=["title"],  # List of columns to use for entity linking
    api_limit=10,
    compact_candidates=True,
    gt_columns=["Title_QID"],  # GT column to exclude from the input
)

# Run LionLinker asynchronously
await lion_linker.run()

# Load LionLinker's results and compare with GT
results_df = pd.read_csv(output_csv)  # Load LionLinker's output

input_df = pd.read_csv(input_csv)  # Load the original input CSV
gt_column = input_df['Title_QID']  # Extract GT column

# Assuming LionLinker output has an 'Extracted Identifier' column
predicted_QIDs = results_df['Extracted Identifier']

# Compute accuracy by comparing predicted QIDs with the ground truth
accuracy = (predicted_QIDs == gt_column).mean()

# Print accuracy
print(f"Accuracy: {accuracy * 100:.2f}%")

2024-11-05 10:50:34,822 - INFO - Initializing components...
2024-11-05 10:50:34,823 - INFO - Model API provider is: ollama
2024-11-05 10:50:34,825 - INFO - Setup completed.
2024-11-05 10:50:34,826 - INFO - Starting processing of ./tests/data/film_with_QIDs.csv...
Processing Batches:   0%|          | 0/31 [00:00<?, ?it/s]2024-11-05 10:50:35,216 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-11-05 10:50:36,302 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-11-05 10:50:36,612 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-11-05 10:50:36,909 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-11-05 10:50:37,176 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-11-05 10:50:37,472 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2024-11-05 10:50:37,754 - INFO - HTTP Request: POST http://127.0.0

Accuracy: 20.00%
