**This notebook loops through a Fabric Lakehouse table containing McDonald's reviews. It sends the reviews to Azure OpenAI using the native OpenAI endpoint in Fabric and has it categorize the reviews based on a list of categories provided while also providing sentiment.**

In [None]:
#Specify the location of the lakehouse table to save results to
lakehouse_table = ''

In [None]:
#Install the openai library we need for this notebook
%pip install openai==0.28.1

In [None]:
#Specify the categories to use seperated by a comma
categories = "'Order Delivery Speed','Order Accuracy','Food Quality','Neighborhood or Location','Cleanliness','Cost','Other'"

In [None]:
#Read the reviews from the Fabric lakehouse table
df = spark.sql("SELECT * FROM Reviews_Lakehouse.mcdonalds_reviews")
display(df)

In [None]:
import pandas as pd
import openai
import json
from pyspark.sql.functions import *
from delta.tables import *

#This function is called to call Azure OpenAI with the prompt shown below. It takes the review for each row as a parameter.
def get_openai_response(review):
    prompt = f"""
    You are a restaurant review analysis bot. Given the following categories: {categories}.
    Respond to the user review data with a category or categories from the list that fits the review comments or just say 'other' if none of the categories fit.
    Only respond with the provided category names and whether the overall sentiment for the review is positive, negative, or neutral.
    Put the response in JSON format as shown here:
    . Example: 'It took 20 minutes to get my order! That is unacceptable. The food also didn't taste very good'
    Response: {{"response": {{"categories":"Order Delivery Speed, Food Quality", "sentiment": "Negative"}} "
    """
    try:
        response = openai.ChatCompletion.create(
            deployment_id='gpt-35-turbo',
            messages = [{
                "role": "system",
                "content": prompt
            },
            {
                "role": "user",
                "content": review
            }],
            temperature=0.0,
            max_tokens=2000,
            top_p=0.95,
            frequency_penalty=0,
            presence_penalty=0,
            stream=False,
            stop=None)
        response = response['choices'][0]['message']['content']
        data = json.loads(response)
        return data['response']['categories'], data['response']['sentiment']

    except Exception as e:
        print(f"Error: {e}")
        return None, None

#Pick only the relevant fields
reviews_df = df.select("review","_unit_id" )
df_pandas = reviews_df.toPandas()

#Create some new columns in the dataframe to store the results
df_pandas['review_topics'], df_pandas['review_sentiment'] = zip(*df_pandas['review'].apply(get_openai_response))

# Change back to a Spark dataframe
df_updates = spark.createDataFrame(df_pandas)

# Load the data currently in the table
deltaTableReviews = DeltaTable.forPath(spark, lakehouse_table)

# This does a Delta merge to update each row with the new sentiment and topics / categories 
deltaTableReviews.alias('reviews_table') \
    .merge(
        df_updates.alias('updates'),
        'reviews_table._unit_id = updates._unit_id'
    ) \
    .whenMatchedUpdate(set = {'reviews_table.review_topics' : 'updates.review_topics', 'reviews_table.review_sentiment' : 'updates.review_sentiment'}) \
    .execute()

In [None]:
#Read the new columns in the Fabric lakehouse table
df2 = spark.sql("SELECT * FROM Reviews_Lakehouse.mcdonalds_reviews LIMIT 10")
display(df2)

In [None]:
# If you want to write the results to another table rather than the current one, you can do that using the code below. Just provide a table name.

delta_table_name = "review_category_and_sentiment"
df_updates.write.mode("overwrite").format("delta").saveAsTable(delta_table_name)