# Purpose
In this notebook, we will manage a set of restaurant clients comments on a restaurant.
The idea is to be able to query comments via openai LLM

# Load environment variables

In [None]:
from dotenv import load_dotenv
import os

load_dotenv(override=True)  # True to override existing environment variables
openai_api_key = os.getenv('OPENAI_API_KEY')

# Read data in a pandas dataframe

In [None]:
# Load the csv file in pandas
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('kebab_comments_with_rate.csv', sep=';')

# Convert DATETIME_STAMP column to datetime
df['DATETIME_STAMP'] = pd.to_datetime(df['DATETIME_STAMP'])

df


# Check data

In [None]:
# Read comments on july month
july_df = df[df['DATETIME_STAMP'].dt.month == 7]
print(july_df[['COMMENT', 'RATE']])

# Implement a filtering function

In [None]:
# A filtering function that returns comments for a given period of time or ratings range
MIN_DATE = '1900-01-01'
MAX_DATE = '9999-12-31'
def filter_comments(df, start_date=MIN_DATE, end_date=MAX_DATE, min_rate=0, max_rate=5):
    filtered_df = df.copy()
    if start_date:
        filtered_df = filtered_df[filtered_df['DATETIME_STAMP'] >= pd.to_datetime(start_date)]
    if end_date:
        filtered_df = filtered_df[filtered_df['DATETIME_STAMP'] <= pd.to_datetime(end_date)]
    if min_rate is not None:
        filtered_df = filtered_df[filtered_df['RATE'] >= min_rate]
    if max_rate is not None:
        filtered_df = filtered_df[filtered_df['RATE'] <= max_rate]
    
    return filtered_df[['COMMENT', 'RATE']]

In [None]:
# Test out the function
print(filter_comments(df, start_date='2025-06-12', end_date='2025-06-20', min_rate=4))

In [None]:
# Implement a method that transforms the filtered data into a list of tuples (comment, rate)
def df_to_tuples(filtered_df):
    # index=False to avoid including the index in the tuples
    # name=None to return plain tuples instead of namedtuples
    return list(filtered_df.itertuples(index=False, name=None))


comments_list = df_to_tuples(filter_comments(df, start_date='2025-06-12', end_date='2025-06-20', min_rate=4))

for comment, rate in comments_list:
    print(f"Comment: {comment}, Rate: {rate}")

# Implement openAI LLM to query comments

In [None]:
# Implementing a query system using OpenAI's LLM
from openai import OpenAI
client = OpenAI(api_key=openai_api_key)

system_prompt = """
You are a helpful assistant that summarizes restaurant customer comments based on user queries.
You will also make some operations on rates.
"""

user_prompt = """
Given the following customer comments and their rates below:
{comments_data}
\n\n
{question}
"""

question = input("Please enter your question about the comments: ")

formatted_comments_list = "\n".join([f"- Comment: {comment}, Rate: {rate}" for comment, rate in comments_list])

messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt.format(comments_data=formatted_comments_list, question=question)}
]

response = client.chat.completions.create(
    model="gpt-4.1-nano",
    messages=messages,
    temperature=0
)

print("Response from the LLM:")
print(response.choices[0].message.content)