In [28]:
#os.environ['OPENAI_API_KEY'] = <your-api-key>
import os
from openai import OpenAI

api_key = os.getenv('OPENAI_API_KEY')

https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb

In [29]:

from python_functions import data_loader


# Load the processed data
Hotel_Reviews = data_loader()

In [30]:
from langchain_text_splitters import RecursiveCharacterTextSplitter



# Merge the columns using string concatenation
Hotel_Reviews['MergedColumn'] = (
     '' +'Hotel: ' + Hotel_Reviews['Hotel_Name'] + 
    '. Positive Guest Review: ' + Hotel_Reviews['Positive_Review'] + 
    '. ' +'Hotel: ' + Hotel_Reviews['Hotel_Name'] + 
    '. Negative Guest Review: '+ Hotel_Reviews['Negative_Review'] + "\n"
)
# Select the first 100 rows of the merged column

# Specify the file name
file_name = "used_data.txt"

# Save the data to a text file
with open(file_name, 'w') as f:
    for line in used_data:
        f.write(line + '\n')

    

In [31]:
# inspired from https://github.com/michaelgcortes/text-summarization-in-python-using-chatgpt/blob/main/gpt-summarizer.ipynb
class TextSummarizer:

    # ChatGPT model that we will be using everywhere
    openai_model = "gpt-4o-mini"


    # Constructor    
    def __init__(self):
        self.apikey = self.fetch_api_key()


    # Method to get API key
    def fetch_api_key(self):
        # In here would be all the code required to fetch the api key ...
        return os.environ.get("OPENAI_API_KEY")


    # Method to take list of sentences and return summarized/average single sentence
    def average_sentences(self, list_of_sentences):
        # Instantiate the client
        client = OpenAI(api_key=self.apikey)

        # Write prompt to chatGPT to execute our task
        prompt = "Here is a list of multiple reviews that I want you to summarize and rewrite as a single review that is roughly the same length as the input reviews. The reviews are separated by newline characters \n as follows: {sentences}"
        prompt = prompt.format(sentences = "\n".join(list_of_sentences))

        # Make request to chat GPT
        completion = client.chat.completions.create(
            model=self.openai_model,
            messages=[
                {"role": "system", "content": "You are an assistant that is able to read several reviews and then combine them into a single summarized review. The reviews will be sent to you with a newline character \n separating them. You will return a single review."},
                {"role": "user", "content": prompt}
            ]
        )

        # Get the summarized sentence output from Chat GPT
        summarized_sentence = completion.choices[0].message.content

        # Close the client
        client.close()

        return summarized_sentence

    # Method to summarize a piece of text
    def summarize_text(self, input_text):
        # Instantiate the client
        client = OpenAI(api_key=self.apikey)


        # Make request to chat GPT
        completion = client.chat.completions.create(
            model=self.openai_model,
            messages=[
                {"role": "system", "content": "You are an assistant that is able to read a piece of text and summarize it. Your summary will be 4 sentences or less."},
                {"role": "user", "content": "Here is the text to be summarized below the newline character.\n {text}".format(text=input_text)}
            ]
        )

        # Get the summary of the inptu text output from Chat GPT
        summary_output = completion.choices[0].message.content

        # Close the client
        client.close()

        return summary_output        
    
    
    # Method to summarize a piece of text
    def summarize_text_topic(self, input_text):
        # Instantiate the client
        client = OpenAI(api_key=self.apikey)


        # Make request to chat GPT
        completion = client.chat.completions.create(
            model=self.openai_model,
            messages=[
                {"role": "system", "content": """You are an fair but critical assistant that is able to read a piece of text and summarize it. Please provide a one sentence general summary.
                 Additionally you will write a summary sentence on each of the for topics: Room, Food and Drinks, Location, Internet and Work and Surprise.
                 Here are some keywords for each topic 
                "Room": ["room", "rooms", "upgrade", "clean", "tidy", "large", "bathroom", "bed", "TV", "shower"] all aspects of describing the status of the room,
                "Food and Drinks": ["drinks", "cocktails", "bottle", "breakfast", "dinner", "menu", "caffee", "tee", "delicious", "continental", "waiter","restaurant "] all aspects describing the quality of food like breakfast and bar,
                "Location": ["close", "far", "next", "park", "train", "bicicle", "car", "walk", "tee", "building", "neighborhood", "cab service", "airport", "subway", "stairs"] all aspects describing the location, surrounding and connection of the hotel,
                "Internet and Work": ["wifi", "Internet", "connection", "work", "password", "computer", "meeting", "signal"] all aspects describing abilty to work from the hotel with a focus on internet connection,
                "Surprise": ["everything", "honestly", "surprising", "change", "unfortunately", "refund"] all aspects which are supringly and not expected by the reviewer,
                Feel free to say that the reviews do not specifically address certain topics.
                 """},
                {"role": "user", "content": "Here is the text to be summarized below the newline character.\n {text}".format(text=input_text)}
            ]
        )

        # Get the summary of the inptu text output from Chat GPT
        summary_output = completion.choices[0].message.content

        # Close the client
        client.close()

        return summary_output    




In [38]:

used_data = Hotel_Reviews.dropna(subset=['MergedColumn'])
grouped_reviews = used_data.groupby('Hotel_Name')['MergedColumn'].apply(lambda x: ' '.join(x)).to_dict()


In [39]:

# Initialize your summarizer
gpt_summarizer = TextSummarizer()

# Summarize each hotel's reviews
summarized_reviews = {}
for hotel, reviews in grouped_reviews.items():
    summarized_reviews[hotel] = gpt_summarizer.summarize_text_topic(reviews)

# Print summarized reviews
for hotel, summary in summarized_reviews.items():
    print(f"{hotel}: {summary}")

11City Rooms: The reviews for 11City Rooms in Chania generally reflect a positive experience with excellent hospitality, cleanliness, and a prime location, though many guests noted issues with noise levels and the small size of the rooms and bathrooms.

**Room:** Most reviews praise the rooms for their cleanliness and comfort, though some guests criticize the size and layout, particularly the small bathrooms and minimal storage space.

**Food and Drinks:** There are mentions of a welcome drink and some snacks provided, but detailed feedback on food and drinks, including breakfast, is largely absent.

**Location:** The hotel's location is repeatedly highlighted as a major advantage, being situated in the bustling heart of Old Town Chania, close to local attractions and amenities.

**Internet and Work:** Reviews do not specifically address the internet connection or the facilities for working from the hotel, suggesting this may not have been a priority for guests.

**Surprise:** While ma

In [36]:
import json

# Store summarized reviews in a JSON file
with open('summarized_reviews_gr.json', 'w') as json_file:
    json.dump(summarized_reviews, json_file, indent=4)

# To retrieve a summary given a hotel name
def get_summary(hotel_name):
    with open('summarized_reviews_gr.json', 'r') as json_file:
        summaries = json.load(json_file)
    return summaries.get(hotel_name, "Summary not found.")

# Example usage
hotel_name = "Achillion Palace"
print(f"{hotel_name}: {get_summary(hotel_name)}")


Achillion Palace: The reviews for Achillion Palace hotel indicate that while its location, cleanliness, and friendly staff are consistently praised, many guests express disappointment regarding outdated facilities and breakfast variety.

**Room:** The rooms are generally spacious, clean, and comfortable, but reviews frequently mention outdated furniture, decor, and occasional plumbing issues.

**Food and Drinks:** Breakfast is described as decent and fresh, though several reviews note a lack of variety and certain desirable items missing.

**Location:** The hotel's central location is highlighted as a significant advantage, with easy access to the beach, old town, and nearby amenities.

**Internet and Work:** There are no specific mentions of internet capabilities or work-related facilities in the reviews.

**Surprise:** Guests are surprised by the discrepancy between the hotel's advertised quality and their experience, with some noting unexpected cleanliness issues and the need for re