# Step 1: Load and Inspect Data

In this step, we load the JSON file with analysis results and inspect its structure to ensure it matches expectations.

In [None]:
import json

# Load the analysis results JSON file
input_file = "../data/processed/generalized.json"

with open(input_file, "r") as json_file:
    data = json.load(json_file)

# Inspect the data
print("Number of reviews by sentiment:")
for sentiment, reviews in data.items():
    print(f"  {sentiment.capitalize()}: {len(reviews)}")

# Display a sample review for each sentiment
for sentiment, reviews in data.items():
    if reviews:
        print(f"\nSample {sentiment.capitalize()} Review:")
        print(json.dumps(reviews[0], indent=4))

# Step 2: Summarize Insights by Sentiment

In this step, we:
1. Count the total number of reviews for each sentiment.
2. Enumerate all generalized key topics mentioned for each sentiment, along with their counts.
3. Store the results in the `report_data` dictionary for later use in report generation.

In [None]:
import json
from collections import Counter

# Initialize the report data dictionary
report_data = {}

# Step 2.1: Count Reviews by Sentiment
report_data["review_counts"] = {
    sentiment: len(reviews) for sentiment, reviews in data.items()
}

# Step 2.2: Aggregate Generalized Key Topics by Sentiment
report_data["generalized_topics_by_sentiment"] = {
    sentiment: dict(Counter([topic for review in reviews for topic in review["generalized_key_topics"]]))
    for sentiment, reviews in data.items()
}

# Print the report data as a JSON object
print(json.dumps(report_data, indent=4))

# Step 3: Percentage Distribution of Topics by Sentiment

In this step, we calculate the percentage distribution of generalized key topics for each sentiment (positive, negative, neutral). These calculations allow us to understand the relative significance of each topic within each sentiment.

The results are stored in the `report_data` dictionary for use in later steps.

In [None]:
# Step 3: Percentage Distribution of Topics by Sentiment

# Initialize a dictionary to store percentage distributions
percentage_distributions = {}

# Calculate percentage distribution for each sentiment
for sentiment, topics in report_data["generalized_topics_by_sentiment"].items():
    total_mentions = sum(topics.values())
    if total_mentions > 0:
        # Calculate percentage for each topic
        percentage_distributions[sentiment] = {
            topic: (count / total_mentions) * 100 for topic, count in topics.items()
        }
    else:
        percentage_distributions[sentiment] = {}

# Update report_data with percentage distributions
report_data["percentage_distribution_by_sentiment"] = percentage_distributions

# Print updated report_data as JSON
import json
print(json.dumps(report_data, indent=4))

# Step 4: Identify Top 3 Worst Topics and Group Reviews

In this step, we:
1. Extract the top 3 worst generalized key topics based on their percentage distribution within negative reviews.
2. Group reviews by these top 3 topics. A single review may appear in multiple groups if it is associated with multiple top topics.

These groups will later be used to clarify detailed issues for each topic by leveraging AI.

In [None]:
# Step 4: Identify Top 3 Worst Topics and Group Reviews

# Extract percentage distribution of negative topics
negative_topic_percentages = sorted(
    report_data["percentage_distribution_by_sentiment"]["negative"].items(),
    key=lambda x: x[1],
    reverse=True
)

# Step 4.1: Extract the top 3 worst topics
top_3_worst_topics = [topic for topic, _ in negative_topic_percentages[:3]]

# Step 4.2: Group reviews by the top 3 worst topics
grouped_reviews_by_topic = {topic: [] for topic in top_3_worst_topics}

for review in data.get("negative", []):
    for topic in top_3_worst_topics:
        if topic in review["generalized_key_topics"]:
            grouped_reviews_by_topic[topic].append(review)

# Display number of reviews for each topic
print("Grouped Reviews by Top 3 Worst Topics:")
for topic, reviews in grouped_reviews_by_topic.items():
    print(f"  {topic}: {len(reviews)} reviews")

# Store results in a dictionary for later use
step_4_results = {
    "top_3_worst_topics": top_3_worst_topics,
    "grouped_reviews_by_topic": grouped_reviews_by_topic
}

# Step 5: Summarize Problems and Generate Recommendations

In this step, we:
1. Use OpenAI API to:
   - Generate a short description of the key problem for each of the top 3 worst topics based on the reviews.
   - Provide a list of actionable recommendations to address the problem.
2. Keep all results in memory for subsequent steps.

In [None]:
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
openai_model = "gpt-4o-mini"
openai_temperature = 0.7

# Function to generate problem description and recommendations for a topic
def generate_problem_description_and_recommendations(topic, reviews):
    """
    Generate a short description of the problem and recommendations for a given topic using OpenAI.

    Args:
        topic (str): The topic for which the description and recommendations are generated.
        reviews (list of dict): List of reviews related to the topic.

    Returns:
        dict: A dictionary containing the topic, AI-generated description, and recommendations.
    """
    # Create a numbered list of reviews
    numbered_reviews = "\n".join([f"{i + 1}. {review['review']}" for i, review in enumerate(reviews)])
    
    # Construct the AI prompt
    prompt = f"""
        The following is a list of reviews related to the topic: "{topic}".

        Reviews:
        {numbered_reviews}

        Based on these reviews:
        1. Generate a short description of the key problem related to this topic.
        2. Provide a list of actionable recommendations to address the identified problem.
        
        Your response should be in the following format:
        {{
            "problem_description": "Short description of the problem.",
            "recommendations": [
                "Recommendation 1",
                "Recommendation 2",
                ...
            ]
        }}
        
        Please ensure the response is in valid JSON format. Do not include any explanations, only provide a JSON outputR that is FC8259 compliant.
        Do not include markdown code blocks in your response. Remove the ```json markdown from the output.

    """

    try:
        # Send the prompt to OpenAI
        response = client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are a helpful assistant for analyzing reviews and suggesting improvements."},
                {"role": "user", "content": prompt}
            ],
            model=openai_model,
            temperature=openai_temperature
        )

        # Parse the AI-generated response
        ai_response = json.loads(response.choices[0].message.content.strip())

        # Return the results
        return {
            "topic": topic,
            "problem_description": ai_response.get("problem_description", "No description provided."),
            "recommendations": ai_response.get("recommendations", [])
        }

    except Exception as e:
        print(f"Error generating description and recommendations for topic '{topic}': {e}")
        return {
            "topic": topic,
            "problem_description": "Error generating description.",
            "recommendations": []
        }

# Process each group and generate problem descriptions and recommendations
report_data["problems_summary"] = []  # Add a new key to store the summaries

for topic, reviews in step_4_results["grouped_reviews_by_topic"].items():
    if reviews:
        result = generate_problem_description_and_recommendations(topic, reviews)
        report_data["problems_summary"].append(result)

# Print the updated report_data as JSON
import json
print(json.dumps(report_data["problems_summary"], indent=4))

# Step 6: Merge Problems and Recommendations

In this step, we:
1. Provide AI with all problem descriptions and actionable recommendations generated for the top topics.
2. Request AI to:
   - Identify and merge overlapping issues across different topics.
   - Create a consolidated description of the hotel's problems.
   - Generate a unified list of recommendations, eliminating redundancies.

In [None]:
# Function to merge problems and recommendations with AI
def merge_problems_and_recommendations(problems_summary):
    """
    Use OpenAI to merge overlapping problems and recommendations into a general summary.

    Args:
        problems_summary (list of dict): List of problem descriptions and recommendations for each topic.

    Returns:
        dict: A dictionary containing the general problem description and consolidated recommendations.
    """
    # Prepare the input for AI
    problems_input = "\n".join([
        f"""
        Topic: {item['topic']}
        Problem Description: {item['problem_description']}
        Recommendations: {', '.join(item['recommendations'])}
        """
        for item in problems_summary
    ])
    
    # Construct the AI prompt
    prompt = f"""
        The following are problem descriptions and recommendations for various hotel-related issues:

        {problems_input}

        Your task:
        1. Identify overlapping problems and merge them into a single general description of the hotel's issues.
        2. Consolidate the recommendations into a unified list, eliminating redundancies. Keep in the list only significant recommendations. The list should contain 5 recommendations at most.
        3. Provide the output in the following format:
        {{
            "general_problem_description": "A summary of the hotel's main problems.",
            "consolidated_recommendations": [
                "Recommendation 1",
                "Recommendation 2",
                ...
            ]
        }}
        4. Make all generated texts easy to read and understand. Create recommendations in plain and attractive language.

        Please ensure the response is in valid JSON format. Do not include any explanations, only provide a JSON outputR that is FC8259 compliant.
        Do not include markdown code blocks in your response. Remove the ```json markdown from the output.
    """

    try:
        # Send the prompt to OpenAI
        response = client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are a helpful assistant for consolidating problems and recommendations."},
                {"role": "user", "content": prompt}
            ],
            model=openai_model,
            temperature=openai_temperature
        )

        # Parse the AI-generated response
        consolidated_results = json.loads(response.choices[0].message.content.strip())
        return consolidated_results

    except Exception as e:
        print(f"Error merging problems and recommendations: {e}")
        return {
            "general_problem_description": "Error generating description.",
            "consolidated_recommendations": []
        }

# Extract problems summary from report_data
problems_summary = report_data.get("problems_summary", [])

# Run the function to merge problems and recommendations
merged_results = merge_problems_and_recommendations(problems_summary)

# Save merged results into report_data
report_data["general_problem_description"] = merged_results.get("general_problem_description", "No description provided.")
report_data["consolidated_recommendations"] = merged_results.get("consolidated_recommendations", [])

# Print updated report_data for validation
import json
print("General Problem Description:")
print(json.dumps(report_data["general_problem_description"], indent=4))
print("\nConsolidated Recommendations:")
print(json.dumps(report_data["consolidated_recommendations"], indent=4))

# Step 7: Save Report Data to JSON File

In this step, we save the prepared `report_data` dictionary into a JSON file. This allows us to persist the processed insights, making them readily available for generating the final report or further analysis.

In [None]:
# Step 8: Save Report Data to JSON File

# Define the output file path
output_file = "../data/processed/report_data.json"

# Save report_data to the JSON file
with open(output_file, "w") as json_file:
    json.dump(report_data, json_file, indent=4)

print(f"Report data successfully saved to {output_file}")