In [1]:
%%capture
!pip install pandas ollama tqdm

In [2]:
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [15]:
NER_PROMPT = """Analyze the following review text indicating whether the sentiment of the review is "positive" or "negative".
Ensure the response is formatted in JSON according to the following schema:

[
  {{
    "label": "<label>",
    "value": "<value>"
  }}
]

Example:

"I recently visited the restaurant 'La Dolce Vita' in Rome and was thrilled with the service and food. The waiter, Marco, was exceptionally friendly and the truffle risotto was simply divine. I can't wait to return and recommend this place to my friends."

[
  {{
    "label": "ORG",
    "value": "La Dolce Vita"
  }},
  {{
    "label": "LOC",
    "value": "Rome"
  }},
  {{
    "label": "PERSON",
    "value": "Marco"
  }}
]

{content}"""

SENTIMENT_PROMPT = """Analyze the following review text listing all named entities present in the text, categorizing them by label. Consider only PERSON, ORG, and LOC categories.

Example:

"I recently visited the restaurant 'La Dolce Vita' in Rome and was thrilled with the service and food. The waiter, Marco, was exceptionally friendly and the truffle risotto was simply divine. I can't wait to return and recommend this place to my friends."

"positive"

{content}"""

FORMATTING_OUTPUT = """You are given three informations: sentiment, review and entities. Generate a JSON representation using the following schema. Use just the data you receive:

{{
  "sentiment": "<sentiment>",
  "review": "<review>",
  "entities": [
    {{
      "label": "<label>",
      "value": "<value>"
    }}
  ]
}}

example:

"Sentiment: positive
Review: I recently visited the restaurant 'La Dolce Vita' in Rome and was thrilled with the service and food. The waiter, Marco, was exceptionally friendly and the truffle risotto was simply divine. I can't wait to return and recommend this place to my friends.
Entities: [
    {{
      "label": "ORG",
      "value": "La Dolce Vita"
    }},
    {{
      "label": "LOC",
      "value": "Rome"
    }},
    {{
      "label": "PERSON",
      "value": "Marco"
    }}
  ]"

```json
{{
  "sentiment": "positive",
  "review": "I recently visited the restaurant 'La Dolce Vita' in Rome and was thrilled with the service and food. The waiter, Marco, was exceptionally friendly and the truffle risotto was simply divine. I can't wait to return and recommend this place to my friends.",
  "entities": [
    {{
      "label": "ORG",
      "value": "La Dolce Vita"
    }},
    {{
      "label": "LOC",
      "value": "Rome"
    }},
    {{
      "label": "PERSON",
      "value": "Marco"
    }}
  ]
}}
```


Sentiment: {sentiment}
Review: {review}
Entities: {entities}"""

In [16]:
import ollama

def process_review(review: str, model_name: str) -> str:
    """
    It processes the review text and returns the LLM response as string.
    
    Arguments:
        review (str): The review text.
        
    Return:
        The LLM response as string.
    """
    try:
        sentiment_response: str = ollama.generate(
            model=model_name,
            prompt=SENTIMENT_PROMPT.format(content=review)
        )["response"]
    except Exception as e:
        logging.error(f"Error invoking the chain at sentiment stage: {e}")
        print(e)
        return None

    try:
        ner_response: str = ollama.generate(
            model=model_name,
            prompt=NER_PROMPT.format(content=review)
        )["response"]
    except Exception as e:
        logging.error(f"Error invoking the chain at NER stage: {e}")
        print(e)
        return None
    
    try:
        return ollama.generate(
            model=model_name,
            prompt=FORMATTING_OUTPUT.format(review=review, sentiment=sentiment_response, entities=ner_response)
        )["response"]
    except Exception as e:
        logging.error(f"Error invoking the chain at formatting stage: {e}")
        print(e)
        return None
        
        

In [17]:
import pandas as pd
from tqdm import tqdm


def call_model_llm(model_name: str, output_file_path: str) -> pd.DataFrame:
    """
    It calls the LLM model using Ollama. It returns the sampled dataframe enriched with relevant columns.
    
    Arguments:
        model_name: The name of the model to invoke via Ollama.
    
    Return:
        The enriched dataframe.
    """
    dataframe: pd.DataFrame = pd.read_csv(output_file_path)
    already_done_part: pd.DataFrame = dataframe[~(dataframe.output == "$$$")].copy()
    slice_to_work_on: pd.DataFrame = dataframe[dataframe.output == "$$$"].copy()
    slice_to_work_on.reset_index(inplace=True, drop=True)
    total_rows: int = len(slice_to_work_on)
    for i in tqdm(range(total_rows), total=total_rows):
        row = slice_to_work_on.iloc[i]
        # logging.info(f"Processing row {i + 1} out of {total_rows}")
        result = process_review(row["review"], model_name)
        slice_to_work_on.loc[i, "output"] = result
        updated_df: pd.DataFrame = pd.concat([already_done_part, slice_to_work_on])
        updated_df.to_csv(output_file_path, index=False)
    return dataframe

In [19]:
import os

# Define the model name to use
MODEL_NAME: str = "phi3:medium"

# Define the output path and get the result as csv
output_file_path: str = f"../resources/sampled_reviews_with_output_multicall_{MODEL_NAME.replace(':', '_')}.csv"

exists: bool = os.path.exists(output_file_path)
if not exists:
    sampled: pd.DataFrame = pd.read_csv("../resources/IMDB Dataset Sampled.csv")
    sampled["output"] = sampled.apply(lambda row: "$$$", axis=1)
    sampled.to_csv(output_file_path, index=False)

# Produce a dataframe by invoking the chain
output: pd.DataFrame = call_model_llm(model_name=MODEL_NAME, output_file_path=output_file_path)

output.to_csv(output_file_path, index=False)

logging.info("Completed processing.")

  0%|          | 0/972 [00:00<?, ?it/s]2024-07-29 15:59:55,853 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2024-07-29 16:00:00,019 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2024-07-29 16:00:32,937 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  0%|          | 1/972 [00:42<11:24:54, 42.32s/it]2024-07-29 16:00:38,199 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2024-07-29 16:00:42,939 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2024-07-29 16:01:05,046 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  0%|          | 2/972 [01:14<9:47:04, 36.31s/it] 2024-07-29 16:01:07,578 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2024-07-29 16:01:10,233 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2024-07-29 16

KeyboardInterrupt: 