In [1]:
!pip install langchain langchain-community pandas

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
from langchain_community.llms import Ollama
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence

import logging
import time
import pandas as pd
import logging
import time

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [None]:
PROMPT_TEMPLATE = """Instruction: Analyze the following review text and provide one distinct outputs formatted in JSON:

1. **Sentiment Classification:** Indicate whether the sentiment of the review is Positive, Neutral, or Negative.
2. **Named Entity Extraction:** List all named entities present in the text, categorizing them by type (e.g., people, locations, organizations, etc.).
3. **Required JSON Format:** Ensure the response is formatted in JSON according to the following schema:

{{
  "sentiment": "<sentiment>",
  "entities": {{
    "person": ["<person_name1>", "<person_name2>", ...],
    "location": ["<location1>", "<location2>", ...],
    "organization": ["<organization1>", "<organization2>", ...],
    "product": ["<product1>", "<product2>", ...]
  }}
}}

example:

"I recently visited the restaurant 'La Dolce Vita' in Rome and was thrilled with the service and food. The waiter, Marco, was exceptionally friendly and the truffle risotto was simply divine. I can't wait to return and recommend this place to my friends."

```json

{{
  "sentiment": "Positive",
  "entities": {{
    "person": ["Marco"],
    "location": ["Rome"],
    "organization": ["La Dolce Vita"],
    "product": ["truffle risotto"]
  }}
}}

```
{content}





"""

In [9]:
def process_review(review: str, chain: RunnableSequence, progressive_index: int) -> str:
    """
    It processes the review text and returns the LLM response as string.
    
    Arguments:
        review (str): The review text.
        chain (RunnableSequence): The LLM chain.
        
    Return:
        The LLM response as string.
    """
    try:
        start_time: float = time.time()
        langchain_output: str = chain.invoke({"content": review})
        end_time: float = time.time()
        processing_time: float = end_time - start_time
        logging.info(f'Processing {progressive_index}/5000 ({(progressive_index / 5000):.2f}%) in {processing_time} seconds')
        return langchain_output
    except Exception as e:
        logging.error(f"Error invoking the chain: {e}")
        return None

In [11]:
def call_model_llm(model_name: str) -> pd.DataFrame:
    """
    It calls the LLM model using Ollama. It returns the sampled dataframe enriched with relevant columns.
    
    Arguments:
        model_name: The name of the model to invoke via Ollama.
    
    Return:
        The enriched dataframe.
    """
    dataframe: pd.DataFrame = pd.read_csv("../resources/IMDB Dataset Sampled.csv")
    chain: RunnableSequence = RunnableSequence(
        PromptTemplate.from_template(PROMPT_TEMPLATE) | Ollama(model=model_name)
    )
    dataframe[f"output"]: pd.Series = dataframe.apply(lambda row: process_review(row["review"], chain, row["progressive_index"]), axis=1)
    return dataframe


Processing Reviews: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [14:31:23<00:00, 10.46s/it]
2024-07-23 02:49:12,157 - INFO - Results saved to sampled_reviews_with_output.csv
2024-07-23 02:49:12,157 - INFO - Completed processing.


In [1]:
# Define the model name to use
MODEL_NAME: str = "llama_3"

# Produce a dataframe by invoking the chain
output: pd.DataFrame = call_model_llm(model_name=MODEL_NAME)

# Define the output path and get the result as csv
output_file_path: str = f"sampled_reviews_with_output_{MODEL_NAME}.csv"
output.to_csv(output_file_path, index=False)

logging.info("Completed processing.")

NameError: name 'call_model_llm' is not defined