# Import Libraries

In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set plotting style for consistent visuals
sns.set_theme(style="whitegrid")

# --- 0. Helper Function for Cuisine Cleaning (used throughout the script) ---
def clean_and_split_cuisines(cuisine_string):
    """Splits a string of cuisines into a list of cleaned, title-cased cuisine names."""
    if pd.isna(cuisine_string):
        return []
    # Convert to title case for consistent matching and searching
    return [c.strip().title() for c in cuisine_string.split(',')]

# --- 1. Data Loading ---
print("--- 1. Data Loading and Initial Inspection ---")
try:
    df = pd.read_csv('final_robust_data.csv')
    print(f"Data loaded successfully. Total rows: {len(df)}")
except FileNotFoundError:
    print("Error: 'final_robust_data.csv' not found.")
    exit()

# --- 2. Feature Engineering ---
print("\n" + "="*80)
print("--- 2. Feature Engineering: Creating Cost, Popularity, and Cuisine List Features ---")

# 2.1 Cost Range Feature: Define cost bins using quantiles
cost_quantiles = df['Cost'].quantile([0.33, 0.66]).tolist()
cost_bins = [df['Cost'].min()] + cost_quantiles + [df['Cost'].max() + 1]
cost_labels = ['Low Cost (Budget-friendly)', 'Medium Cost (Mid-range)', 'High Cost (Premium)']
df['Cost_Range'] = pd.cut(df['Cost'], bins=cost_bins, labels=cost_labels, right=False)
print("Cost_Range Distribution:")
print(df['Cost_Range'].value_counts().to_markdown(numalign="left", stralign="left"))

# 2.2 Popularity Feature: Define votes bins using quantiles
vote_quantiles = df['Votes'].quantile([0.5, 0.9]).tolist()
vote_bins = [df['Votes'].min()] + vote_quantiles + [df['Votes'].max() + 1]
vote_labels = ['Niche/Less Voted', 'Average Popularity', 'Very Popular']
df['Popularity'] = pd.cut(df['Votes'], bins=vote_bins, labels=vote_labels, right=False)
print("\nPopularity Distribution:")
print(df['Popularity'].value_counts().to_markdown(numalign="left", stralign="left"))

# 2.3 Cuisine List Feature (Required for recommendation filtering)
df['Cuisine_List'] = df['Cuisine'].apply(clean_and_split_cuisines)


# --- 3. Full Detailed Analysis (Tables) ---
print("\n" + "="*80)
print("--- 3. Full Detailed Analysis (Statistical Tables) ---")

# 3.1 Location Analysis
print("\n3.1 Top 10 Localities (Restaurant Concentration):")
top_10_localities_count = df['Locality'].value_counts().nlargest(10)
print(top_10_localities_count.to_markdown(numalign="left", stralign="left"))

# 3.2 Cuisine Analysis and Suggestion List
all_cuisines = df['Cuisine_List'].explode()
top_10_cuisines = all_cuisines.value_counts().nlargest(10)
unique_cuisines_list = sorted(all_cuisines.dropna().unique())

print("\n3.2a Top 10 Most Frequent Cuisines:")
print(top_10_cuisines.to_markdown(numalign="left", stralign="left"))
print(f"\n3.2b Unique Cuisines List (for Suggestion Feature): {len(unique_cuisines_list)} total.")
print(f"Snippet: {unique_cuisines_list[:10]}...")

# 3.3 Factor Statistics (Health, Mood, Robust Score)
print("\n3.3 Factor Statistics (Mean, Median, Std Dev):")
factors_stats = df[['Health_Score_New', 'Normalized_Mood_Score', 'Final_Robust_Score']].agg(['mean', 'median', 'std'])
print(factors_stats.to_markdown(numalign="left", stralign="left"))

# 3.4 Correlation Matrix
print("\n3.4 Correlation Matrix of Key Features:")
correlation_cols = ['Rating', 'Votes', 'Cost', 'Trust_Score', 'Health_Score_New', 'Normalized_Mood_Score', 'Final_Robust_Score']
correlation_matrix = df[correlation_cols].corr()
print(correlation_matrix.to_markdown(numalign="left", stralign="left"))


# --- 4. The Recommendation Model Function ---
print("\n" + "="*80)
print("--- 4. The Recommendation Model Function (get_recommendations) ---")

def get_recommendations(
    df,
    target_cuisine: str = None,
    target_locality: str = None,
    target_cost_range: str = None,
    sort_by: str = 'Final_Robust_Score', # Default sort by overall quality
    top_n: int = 10
) -> pd.DataFrame:
    """
    Generates a list of recommended restaurants based on user-defined criteria.
    """
    recommendations = df.copy()
    
    # 1. Apply Filters
    
    # Filter by Cuisine (matches against the pre-processed Cuisine_List)
    if target_cuisine:
        target_cuisine_title = target_cuisine.strip().title()
        recommendations = recommendations[
            recommendations['Cuisine_List'].apply(lambda x: target_cuisine_title in x)
        ]
        
    # Filter by Locality
    if target_locality:
        recommendations = recommendations[
            recommendations['Locality'].str.contains(target_locality, case=False, na=False)
        ]

    # Filter by Cost Range
    if target_cost_range and target_cost_range in df['Cost_Range'].cat.categories:
        recommendations = recommendations[
            recommendations['Cost_Range'] == target_cost_range
        ]

    # Handle case with no results after filtering
    if recommendations.empty:
        print("Warning: No exact matches found for the specified criteria.")
        return df.sort_values(by='Final_Robust_Score', ascending=False).head(5)[
            ['Name', 'Locality', 'Cuisine', 'Rating', 'Cost', 'Final_Robust_Score']
        ]

    # 2. Sort and Rank the Results
    
    ranked_recommendations = recommendations.sort_values(
        by=sort_by,
        ascending=False
    ).head(top_n)

    # 3. Final Output
    return ranked_recommendations[
        ['Name', 'Locality', 'Cuisine', 'Rating', 'Cost', sort_by]
    ].rename(columns={sort_by: f"Ranked by: {sort_by}"})


# --- 5. Visualization (Saves 4 PNG files) ---
print("\n" + "="*80)
print("--- 5. Visualization: Generating 4 Plots (.png files saved) ---")

# 5.1 Data Prep for Visualization
robust_score_by_locality = df[df['Locality'].isin(top_10_localities_count.index)].groupby('Locality')['Final_Robust_Score'].mean()
robust_score_by_cost = df.groupby('Cost_Range')['Final_Robust_Score'].mean().reindex(cost_labels)

# Plot 1: Top 10 Localities vs. Average Robust Score
plt.figure(figsize=(10, 6))
robust_score_by_locality.sort_values(ascending=True).plot(kind='barh', color='skyblue')
plt.title('A. Average Final Robust Score by Top 10 Locality')
plt.xlabel('Average Final Robust Score')
plt.ylabel('Locality')
plt.tight_layout()
plt.savefig('avg_robust_score_by_locality.png')
plt.close()

# Plot 2: Cost Range vs. Average Robust Score
plt.figure(figsize=(8, 5))
robust_score_by_cost.plot(kind='bar', color=['lightcoral', 'lightgreen', 'gold'])
plt.title('B. Average Final Robust Score by Cost Range')
plt.xlabel('Cost Range')
plt.ylabel('Average Final Robust Score')
plt.xticks(rotation=0)
plt.tight_layout()
plt.savefig('avg_robust_score_by_cost_range.png')
plt.close()

# Plot 3: Top 10 Cuisines Count
plt.figure(figsize=(10, 6))
top_10_cuisines.sort_values(ascending=True).plot(kind='barh', color='teal')
plt.title('C. Top 10 Most Frequent Cuisines')
plt.xlabel('Number of Restaurants')
plt.ylabel('Cuisine')
plt.tight_layout()
plt.savefig('top_10_cuisines_count.png')
plt.close()

# Plot 4: Rating Distribution
plt.figure(figsize=(8, 5))
sns.histplot(df['Rating'], bins=20, kde=True, color='purple')
plt.title('D. Distribution of Restaurant Ratings')
plt.xlabel('Rating')
plt.ylabel('Count')
plt.tight_layout()
plt.savefig('rating_distribution.png')
plt.close()

print("\nAnalysis and visualization complete. Four image files saved.")

# --- 6. Example Usage of the Recommendation Function ---
print("\n" + "="*80)
print("--- 6. Example Usage of Recommendation Function ---")

# Example A: High-quality, mid-range Italian food in South Bangalore
recs_a = get_recommendations(
    df,
    target_cuisine='Italian',
    target_locality='South Bangalore',
    target_cost_range='Medium Cost (Mid-range)',
    top_n=5
)
print("\nExample A: Top 5 Mid-Range Italian in South Bangalore (Sorted by Robust Score):")
print(recs_a.to_markdown(index=False, numalign="left", stralign="left"))

# Example B: Best Health-focused option overall, regardless of other factors
recs_b = get_recommendations(
    df,
    sort_by='Health_Score_New',
    top_n=5
)
print("\nExample B: Top 5 Health-Focused Restaurants Overall (Sorted by Health Score):")
print(recs_b.to_markdown(index=False, numalign="left", stralign="left"))

--- 1. Data Loading and Initial Inspection ---
Data loaded successfully. Total rows: 6593

--- 2. Feature Engineering: Creating Cost, Popularity, and Cuisine List Features ---
Cost_Range Distribution:
| Cost_Range                 | count   |
|:---------------------------|:--------|
| High Cost (Premium)        | 2531    |
| Low Cost (Budget-friendly) | 2135    |
| Medium Cost (Mid-range)    | 1927    |

Popularity Distribution:
| Popularity         | count   |
|:-------------------|:--------|
| Niche/Less Voted   | 3290    |
| Average Popularity | 2643    |
| Very Popular       | 660     |

--- 3. Full Detailed Analysis (Statistical Tables) ---

3.1 Top 10 Localities (Restaurant Concentration):
| Locality        | count   |
|:----------------|:--------|
| South Bangalore | 364     |
| South Delhi     | 339     |
| South Kolkata   | 310     |
| West Ahmedabad  | 236     |
| East Bangalore  | 235     |
| West Hyderabad  | 221     |
| Gurgaon         | 184     |
| West Delhi      | 173   

  robust_score_by_cost = df.groupby('Cost_Range')['Final_Robust_Score'].mean().reindex(cost_labels)



Analysis and visualization complete. Four image files saved.

--- 6. Example Usage of Recommendation Function ---

Example A: Top 5 Mid-Range Italian in South Bangalore (Sorted by Robust Score):
| Name             | Locality        | Cuisine                                               | Rating   | Cost   | Ranked by: Final_Robust_Score   |
|:-----------------|:----------------|:------------------------------------------------------|:---------|:-------|:--------------------------------|
| House of Commons | South Bangalore | Finger Food,  North Indian,  Italian,  Continental    | 4.4      | 1000   | 4.32901                         |
| Via Milano       | South Bangalore | Multi-Cuisine,  Italian,  Continental,  Mediterranean | 4.4      | 1100   | 4.23848                         |
| Onesta           | South Bangalore | Italian,  Pizza,  Desserts,  Fast Food                | 4.4      | 700    | 4.22592                         |
| Onesta           | South Bangalore | Italian,  Pizza,  De