In [1]:
!pip install pandas ollama tqdm

Looking in indexes: http://dev:****@nexus-leitha.servizi.gr-u.it/repository/leitha_python_all/simple

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
PROMPT_TEMPLATE = """Instruction: Analyze the following review text and provide one distinct outputs formatted in JSON:

1. **Sentiment Classification:** Indicate whether the sentiment of the review is "positive" or "negative".
2. **Named Entity Extraction:** List all named entities present in the text, categorizing them by label (PERSON, ORG, LOC).
3. **Required JSON Format:** Ensure the response is formatted in JSON according to the following schema:

{{
  "sentiment": "<sentiment>",
  "review": "<review>",
  "entities": [
    {{
      "label": "<label>",
      "value": "<value>"
    }}
  ]
}}

example:

"I recently visited the restaurant 'La Dolce Vita' in Rome and was thrilled with the service and food. The waiter, Marco, was exceptionally friendly and the truffle risotto was simply divine. I can't wait to return and recommend this place to my friends."

```json
{{
  "sentiment": "positive",
  "review": "I recently visited the restaurant 'La Dolce Vita' in Rome and was thrilled with the service and food. The waiter, Marco, was exceptionally friendly and the truffle risotto was simply divine. I can't wait to return and recommend this place to my friends.",
  "entities": [
    {{
      "label": "ORG",
      "value": "La Dolce Vita"
    }},
    {{
      "label": "LOC",
      "value": "Rome"
    }},
    {{
      "label": "PERSON",
      "value": "Marco"
    }}
  ]
}}
```

{content}"""

In [50]:
import ollama


def process_review(review: str, model_name: str) -> str:
    """
    It processes the review text and returns the LLM response as string.
    
    Arguments:
        review (str): The review text.
        
    Return:
        The LLM response as string.
    """
    try:
        return ollama.generate(
            model=model_name,
            prompt=PROMPT_TEMPLATE.format(content=review)
        )["response"]
    except Exception as e:
        print(f"Error invoking the chain: {str(e)}")
        return None

In [62]:
import pandas as pd
from tqdm import tqdm


def call_model_llm(model_name: str, output_file_path: str) -> None:
    """
    It calls the LLM model using Ollama. It returns the sampled dataframe enriched with relevant columns.
    
    Arguments:
        model_name: The name of the model to invoke via Ollama.
    
    Return:
        The enriched dataframe.
    """
    dataframe: pd.DataFrame = pd.read_csv(output_file_path)
    already_done_part: pd.DataFrame = dataframe[~(dataframe.output == "$$$")].copy()
    slice_to_work_on: pd.DataFrame = dataframe[dataframe.output == "$$$"].copy()
    slice_to_work_on.reset_index(inplace=True, drop=True)
    total_rows: int = len(slice_to_work_on)
    for i in tqdm(range(total_rows), total=total_rows):
        row = slice_to_work_on.iloc[i]
        # logging.info(f"Processing row {i + 1} out of {total_rows}")
        result = process_review(row["review"], model_name)
        slice_to_work_on.loc[i, "output"] = result
        updated_df: pd.DataFrame = pd.concat([already_done_part, slice_to_work_on])
        updated_df.to_csv(output_file_path, index=False)

In [63]:
import os

# Define the model name to use
MODEL_NAME: str = "phi3:medium"

# Define the output path and get the result as csv
output_file_path: str = f"../resources/sampled_reviews_with_output_{MODEL_NAME.replace(':', '_')}.csv"

exists: bool = os.path.exists(output_file_path)
if not exists:
    sampled: pd.DataFrame = pd.read_csv("../resources/IMDB Dataset Sampled.csv")
    sampled["output"] = sampled.apply(lambda row: "$$$", axis=1)
    sampled.to_csv(output_file_path, index=False)

# Produce a dataframe by invoking the chain
call_model_llm(model_name=MODEL_NAME, output_file_path=output_file_path)

  0%|          | 0/32 [00:00<?, ?it/s]

0


2024-07-29 14:46:41,553 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  3%|▎         | 1/32 [00:27<14:11, 27.46s/it]

1


2024-07-29 14:46:51,882 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  6%|▋         | 2/32 [00:37<08:41, 17.37s/it]

2


2024-07-29 14:47:04,462 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  9%|▉         | 3/32 [00:50<07:20, 15.19s/it]

3


2024-07-29 14:47:15,899 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 12%|█▎        | 4/32 [01:01<06:23, 13.71s/it]

4


2024-07-29 14:47:24,608 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 16%|█▌        | 5/32 [01:10<05:21, 11.91s/it]

5


2024-07-29 14:47:37,344 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 19%|█▉        | 6/32 [01:23<05:16, 12.19s/it]

6


2024-07-29 14:47:53,507 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 22%|██▏       | 7/32 [01:39<05:37, 13.49s/it]

7


2024-07-29 14:48:00,766 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 25%|██▌       | 8/32 [01:46<04:36, 11.50s/it]

8


2024-07-29 14:48:12,936 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 28%|██▊       | 9/32 [01:58<04:29, 11.71s/it]

9


2024-07-29 14:48:33,957 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 31%|███▏      | 10/32 [02:19<05:20, 14.59s/it]

10


2024-07-29 14:48:45,160 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 34%|███▍      | 11/32 [02:31<04:44, 13.55s/it]

11


2024-07-29 14:48:56,019 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 38%|███▊      | 12/32 [02:41<04:14, 12.73s/it]

12


2024-07-29 14:49:20,932 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 41%|████      | 13/32 [03:06<05:12, 16.42s/it]

13


2024-07-29 14:49:35,644 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 44%|████▍     | 14/32 [03:21<04:46, 15.91s/it]

14


2024-07-29 14:49:50,909 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 47%|████▋     | 15/32 [03:36<04:27, 15.71s/it]

15


2024-07-29 14:50:08,591 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 50%|█████     | 16/32 [03:54<04:20, 16.30s/it]

16


2024-07-29 14:50:21,510 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 53%|█████▎    | 17/32 [04:07<03:49, 15.29s/it]

17


2024-07-29 14:50:31,762 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 56%|█████▋    | 18/32 [04:17<03:12, 13.77s/it]

18


2024-07-29 14:50:36,336 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 59%|█████▉    | 19/32 [04:22<02:23, 11.01s/it]

19


2024-07-29 14:51:13,455 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 62%|██████▎   | 20/32 [04:59<03:46, 18.86s/it]

20


2024-07-29 14:51:25,154 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 66%|██████▌   | 21/32 [05:11<03:03, 16.71s/it]

21


2024-07-29 14:51:35,770 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 69%|██████▉   | 22/32 [05:21<02:28, 14.88s/it]

22


2024-07-29 14:51:49,776 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 72%|███████▏  | 23/32 [05:35<02:11, 14.62s/it]

23


2024-07-29 14:52:00,928 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 75%|███████▌  | 24/32 [05:46<01:48, 13.58s/it]

24


2024-07-29 14:52:14,553 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 78%|███████▊  | 25/32 [06:00<01:35, 13.59s/it]

25


2024-07-29 14:52:29,007 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 81%|████████▏ | 26/32 [06:14<01:23, 13.85s/it]

26


2024-07-29 14:52:40,929 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 84%|████████▍ | 27/32 [06:26<01:06, 13.27s/it]

27


2024-07-29 14:52:51,745 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 88%|████████▊ | 28/32 [06:37<00:50, 12.54s/it]

28


2024-07-29 14:53:05,397 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 91%|█████████ | 29/32 [06:51<00:38, 12.87s/it]

29


2024-07-29 14:53:28,362 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 94%|█████████▍| 30/32 [07:14<00:31, 15.90s/it]

30


2024-07-29 14:53:40,090 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
 97%|█████████▋| 31/32 [07:25<00:14, 14.65s/it]

31


2024-07-29 14:53:52,972 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
100%|██████████| 32/32 [07:38<00:00, 14.34s/it]
2024-07-29 14:53:53,100 - INFO - Completed processing.
