In [None]:
!pip install pandas numpy geopy nltk vaderSentiment scikit-learn scipy flask flask-cors pyngrok



In [None]:
!curl -sSL https://ngrok-agent.s3.amazonaws.com/ngrok.asc \
	| sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null \
	&& echo "deb https://ngrok-agent.s3.amazonaws.com buster main" \
	| sudo tee /etc/apt/sources.list.d/ngrok.list \
	&& sudo apt update \
	&& sudo apt install ngrok

!ngrok config add-authtoken YOUR_AUTH_TOKEN

In [None]:
import requests
import pandas as pd
import time
import random
import re
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.neighbors import NearestNeighbors
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
import gradio


# 🔹 Google API Key
API_KEY = ""  # Replace with your actual key

# 🔹 Base URLs for Google Places API v2
PLACES_SEARCH_URL = "https://places.googleapis.com/v1/places:searchNearby"
PLACES_DETAILS_URL = "https://places.googleapis.com/v1/places/"
GEOCODE_API_URL = "https://maps.googleapis.com/maps/api/geocode/json"

# Expanded List: Comprehensive Cuisine Keywords & Variants with Afghan Cuisine and 10 Keywords Each
CUISINE_KEYWORDS = {
    "arabic": ["arabic", "middle_eastern", "levantine", "khaleeji", "gulf_cuisine",
               "mezze", "manakish", "hummus", "falafel", "fattoush"],
    "saudi": ["saudi", "saudi_arabian", "najdi", "hijazi", "bedouin_food",
              "kabsa", "jareesh", "mutabbaq", "saleeg", "mofatah"],
    "yemeni": ["yemeni", "mandi", "madfoon", "zurbian", "haneeth",
               "saltah", "fahsa", "lahsa", "aseeda", "maraq"],
    "lebanese": ["lebanese", "lebanon_food", "shawarma_house", "mezze",
                 "tabbouleh", "kibbeh", "fattoush", "hummus", "batata_harra"],
    "turkish": ["turkish", "turkey_food", "ottoman_cuisine", "doner_kebab",
                "pide", "lahmacun", "baklava", "kebab", "manti"],
    "indian": ["indian", "biryani_house", "tandoori", "mughlai", "chicken_tikka",
               "masala_dosa", "dal_makhani", "paneer_butter", "samosa", "naan"],
    "pakistani": ["pakistani", "karahi", "nihari", "haleem", "punjabi_cuisine",
                  "chicken_sajji", "chapli_kebab", "bhuna_gosht", "korma", "paya"],
    "afghan": ["afghan", "kabuli_pulao", "afghani_kebab", "afghanistan_food",
               "bolani", "ashak", "mantoo", "shorwa", "chalow", "qabeli"],
    "chinese": ["chinese", "szechuan", "dim_sum", "hong_kong_food", "hot_pot",
                "kung_pao", "mapo_tofu", "peking_duck", "wonton", "spring_rolls"],
    "japanese": ["japanese", "sushi", "ramen", "yakitori", "izakaya",
                 "tempura", "udon", "miso_soup", "teriyaki", "bento"],
    "korean": ["korean", "kimchi", "bibimbap", "kbbq", "korean_bbq",
               "bulgogi", "tteokbokki", "samgyeopsal", "soondubu", "japchae"],
    "thai": ["thai", "pad_thai", "tom_yum", "green_curry", "som_tum",
             "massaman_curry", "larb", "satay", "kao_pad", "yam_talay"],
    "malaysian": ["malaysian", "nasi_lemak", "rendang", "mee_goreng", "char_kway_teow",
                  "laksa", "roti_canai", "hainanese_chicken", "satay", "cendol"],
    "italian": ["italian", "pizza", "pasta", "ristorante", "italian_trattoria",
                "lasagna", "risotto", "bruschetta", "osso_buco", "caprese"],
    "french": ["french", "baguette", "croissant", "bistro", "pâtisserie",
               "ratatouille", "coq_au_vin", "boeuf_bourguignon", "soufflé", "escargot"],
    "greek": ["greek", "gyro", "tzatziki", "mediterranean_food", "moussaka",
              "souvlaki", "spanakopita", "dolmades", "baklava", "horta"],
    "spanish": ["spanish", "paella", "tapas", "churros", "gazpacho",
                "patatas_bravas", "fabada", "croquetas", "jamon", "flan"],
    "german": ["german", "bratwurst", "bavarian_food", "schnitzel", "pretzel",
               "kartoffelsalat", "eintopf", "lebkuchen", "currywurst", "apfelstrudel"],
    "american": ["american", "burgers", "steakhouse", "fried_chicken", "barbeque",
                 "mac_and_cheese", "hot_dog", "bbq_ribs", "buffalo_wings", "meatloaf"],
    "mexican": ["mexican", "tacos", "burritos", "quesadillas", "salsa",
                "tamales", "chiles_rellenos", "pozole", "enchiladas", "guacamole"],
    "ethiopian": ["ethiopian", "injera", "doro_wat", "habesha_food", "tibs",
                  "kitfo", "shiro", "berbere", "foul", "gomen"],
    "nigerian": ["nigerian", "jollof_rice", "suya", "pounded_yam", "egusi_soup",
                 "fufu", "moimoi", "okro_soup", "afang_soup", "banga_soup"],
    "shawarma": ["shawarma", "gyro", "arabic_wrap", "doner", "grilled_kebab",
                 "souvlaki", "tawook", "shish_kebab", "kofta", "kibbeh"],
    "seafood": ["seafood", "grilled_fish", "lobster", "shrimp_house", "oysters",
                "sushi", "ceviche", "crab_cakes", "fish_tacos", "calamari"],
    "vegetarian": ["vegetarian", "vegan", "plant_based", "organic_food", "tofu",
                   "lentil_soup", "salad_bowl", "quinoa", "hummus", "avocado_toast"],
}


# 🔹 Function to get city coordinates
def get_city_coordinates(city_name):
    params = {"address": city_name, "key": API_KEY}
    response = requests.get(GEOCODE_API_URL, params=params).json()

    if response.get("status") == "OK":
        location = response["results"][0]["geometry"]["location"]
        return location["lat"], location["lng"]
    else:
        print("Error getting city coordinates:", response)
        return None, None

# 🔹 Function to get a list of restaurants from multiple grid locations
def get_restaurants(city, total_required=2000, radius=1000):
    lat, lng = get_city_coordinates(city)
    if lat is None or lng is None:
        return []

    restaurants = []
    headers = {"X-Goog-Api-Key": API_KEY, "X-Goog-FieldMask": "places.displayName,places.id,places.rating,places.priceLevel,places.userRatingCount,places.location,places.types"}

    search_areas = [(lat + random.uniform(-0.1, 0.1), lng + random.uniform(-0.1, 0.1)) for _ in range(total_required // 20)]

    for lat_grid, lng_grid in search_areas:
        payload = {
            "includedTypes": ["restaurant"],
            "locationRestriction": {
                "circle": {
                    "center": {"latitude": lat_grid, "longitude": lng_grid},
                    "radius": radius
                }
            }
        }

        response = requests.post(PLACES_SEARCH_URL, headers=headers, json=payload).json()
        #print(response)

        if "places" not in response:
            print("Error fetching restaurant data:", response)
            continue

        for place in response["places"]:
            restaurants.append({
                "place_id": place["id"],
                "name": place["displayName"]["text"],
                "rating": place.get("rating", 0),
                "total_ratings": place.get("userRatingCount", 0),
                "price_level": place.get("priceLevel", ""),
                "latitude": place["location"]["latitude"],
                "longitude": place["location"]["longitude"],
                "types":place["types"]
            })

            if len(restaurants) >= total_required:
                return restaurants

        time.sleep(1)  # Prevent API rate limits

    return restaurants

# 🔹 Function to get additional restaurant details
def get_restaurant_details_old(place_id):
    headers = {"X-Goog-Api-Key": API_KEY, "X-Goog-FieldMask": "displayName,formattedAddress,internationalPhoneNumber,regularOpeningHours,editorialSummary,reviews"}
    response = requests.get(f"{PLACES_DETAILS_URL}{place_id}", headers=headers).json()

    if "displayName" not in response:
        return {}

    details = response
    reviews = details.get("reviews", [])

    return {
        "phone_number": details.get("internationalPhoneNumber", ""),
        "full_address": details.get("formattedAddress", ""),
        "opening_hours": details.get("regularOpeningHours", {}).get("weekdayDescriptions", []),
        "reviews": [review["text"]["text"] for review in reviews[:3]]  # Limit to top 3 reviews
    }

# 🔹 Function to get additional restaurant details (Handles ALL missing cases)
def get_restaurant_details(place_id):
    headers = {
        "X-Goog-Api-Key": API_KEY,
        "X-Goog-FieldMask": "displayName,formattedAddress,internationalPhoneNumber,regularOpeningHours,editorialSummary,reviews"
    }

    try:
        response = requests.get(f"{PLACES_DETAILS_URL}{place_id}", headers=headers).json()

        if not response or "displayName" not in response:
            return {
                "phone_number": None,
                "full_address": None,
                "opening_hours": None,
                "reviews": None
            }

        details = response

        # ✅ Handle missing phone number
        phone_number = details.get("internationalPhoneNumber", None)

        # ✅ Handle missing full address
        full_address = details.get("formattedAddress", None)

        # ✅ Handle missing opening hours (ensure proper fallback)
        opening_hours = details.get("regularOpeningHours", {}).get("weekdayDescriptions", None)
        if not opening_hours:
            opening_hours = None  # Ensure consistent output if missing

        # ✅ Handle missing reviews (ensure list format)
        reviews = details.get("reviews", [])
        if not isinstance(reviews, list):  # If it's not a list, make it an empty list
            reviews = []

        # ✅ Extract up to 3 reviews safely (ensure no failure)
        extracted_reviews = []
        for review in reviews[:15]:
            review_text = review.get("text", {}).get("text", None)
            if review_text:  # Only add valid text
                extracted_reviews.append(review_text)

        return {
            "phone_number": phone_number,
            "full_address": full_address,
            "opening_hours": opening_hours,
            "reviews": extracted_reviews if extracted_reviews else None  # Ensure None if no reviews exist
        }

    except Exception as e:
        print(f"Error fetching details for place_id {place_id}: {str(e)}")
        return {
            "phone_number": None,
            "full_address": None,
            "opening_hours": None,
            "reviews": None
        }

def extract_cuisine_from_text(text):
    """
    Finds relevant cuisine types from a given text (reviews, restaurant types).
    Matches both exact cuisine names and their variants.
    """
    text = text.lower().replace("-", "_")  # Normalize text
    matched_cuisines = set()

    for cuisine, keywords in CUISINE_KEYWORDS.items():
        if any(re.search(rf"\b{kw}\b", text) for kw in keywords):
            matched_cuisines.add(cuisine)

    return list(matched_cuisines) if matched_cuisines else ["unknown"]


# 🔹 Function to collect and save data for multiple cities
def collect_and_save_data(cities, total_required_per_city=2000, output_file="restaurants_data_v2.csv"):
    all_restaurants = []  # Store data for all cities

    for city in cities:
        print(f"🌍 Collecting data for {city}...")
        restaurants = get_restaurants(city, total_required=total_required_per_city)

        for i, restaurant in enumerate(restaurants):
            details = get_restaurant_details(restaurant["place_id"])
            restaurant.update(details)
            all_restaurants.append(restaurant)

            if i % 50 == 0:  # Save progress every 50 records
                df = pd.DataFrame(all_restaurants)
                df.to_csv(output_file, index=False, encoding="utf-8-sig")
                #print(f"✅ {len(all_restaurants)} total restaurants collected so far...")

    # Final save after all cities are processed
    df = pd.DataFrame(all_restaurants)
    df.to_csv(output_file, index=False, encoding="utf-8-sig")
    print(f"✅ Data collection complete: {len(df)} restaurants saved from {len(cities)} cities.")

# 🔹 Define the list of cities to collect data from
cities_list = ["Makkah, Saudi Arabia", "Madinah, Saudi Arabia"]

# 🔹 Run the script for multiple cities
#collect_and_save_data(cities_list, total_required_per_city=5000)



In [10]:

# Load the dataset
file_path = "restaurants_data_v2.csv"  # Update with your file path
df = pd.read_csv(file_path)

# Define the columns that must be non-null in the best selection
required_columns = ["place_id", "name", "rating", "total_ratings", "price_level",
                    "latitude", "longitude", "opening_hours", "reviews", "cuisine"]

df["cuisine"] = (df["types"].astype(str) + " " + df["reviews"].astype(str)).apply(lambda row: extract_cuisine_from_text(row))  # Convert string to list

# Step 1: Identify duplicate restaurants based on `place_id`
duplicates = df[df.duplicated(subset=["place_id"], keep=False)]  # Find all duplicate restaurants

# Step 2: Select the duplicate entries that have **all required columns filled**
filtered_duplicates = duplicates.dropna(subset=required_columns)

# Step 3: If a restaurant has **no fully complete duplicates**, pick one with the least missing values
remaining_duplicates = duplicates[~duplicates["place_id"].isin(filtered_duplicates["place_id"])]
remaining_best = remaining_duplicates.groupby("place_id").apply(lambda x: x.loc[x.isnull().sum(axis=1).idxmin()])

# Identify list-type columns and convert them to strings
list_columns = ["cuisine", "reviews", "types"]  # Adjust based on actual data

for col in list_columns:
    if col in remaining_best.columns:
        remaining_best[col] = remaining_best[col].astype(str)
        filtered_duplicates[col] = filtered_duplicates[col].astype(str)

remaining_best = remaining_best.reset_index(drop=True)

final_selection = pd.concat([filtered_duplicates, remaining_best]).drop_duplicates()

# Step 5: Remove all other duplicate entries from the original dataset
df_cleaned = df[~df["place_id"].isin(duplicates["place_id"])]  # Keep only unique restaurants
df_cleaned = pd.concat([df_cleaned, final_selection])  # Add back the best duplicates

# Save the cleaned dataset
cleaned_file_path = "restaurants_cleaned.csv"
df_cleaned.to_csv(cleaned_file_path, index=False)

print(f"✅ Cleaning complete! {len(df_cleaned)} unique restaurants saved.")


✅ Cleaning complete! 3014 unique restaurants saved.


  remaining_best = remaining_duplicates.groupby("place_id").apply(lambda x: x.loc[x.isnull().sum(axis=1).idxmin()])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_duplicates[col] = filtered_duplicates[col].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_duplicates[col] = filtered_duplicates[col].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing

In [11]:
import pandas as pd
import numpy as np
import ast
from geopy.distance import geodesic
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk

# Download sentiment analysis model (only once)
nltk.download('vader_lexicon')

# Load the dataset
file_path = "restaurants_cleaned.csv"  # Update with your actual file path
df = pd.read_csv(file_path)

# Initialize Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

### 🔹 1️⃣ Normalize `rating`
df["normalized_rating"] = df["rating"] / 5.0  # Convert to 0-1 scale

### 🔹 2️⃣ Compute `Weighted Rating` (rating * total reviews)
df["weighted_rating"] = df["rating"] * (df["total_ratings"] / df["total_ratings"].max())

### 🔹 3️⃣ Convert `price_level` to Numeric
price_mapping = {
    "PRICE_LEVEL_INEXPENSIVE": 1,
    "PRICE_LEVEL_MODERATE": 2,
    "PRICE_LEVEL_EXPENSIVE": 3
}
df["price_level_numeric"] = df["price_level"].map(price_mapping).fillna(0)  # Fill NaN with 0

### 🔹 6️⃣ Sentiment Analysis on `reviews`
def get_sentiment_score(reviews):
    """Compute average sentiment score from reviews"""
    if pd.isna(reviews):
        return 0  # Neutral sentiment if no reviews
    try:
        review_list = ast.literal_eval(reviews)  # Convert string to list
        scores = [sia.polarity_scores(text)["compound"] for text in review_list if text]
        return np.mean(scores) if scores else 0
    except:
        return 0

df["sentiment_score"] = df["reviews"].apply(get_sentiment_score)

# Perform One-Hot Encoding
df["cuisine_types"] = df["cuisine"]
df = pd.get_dummies(df, columns=["cuisine_types"], prefix="cuisine", dtype=float)

### 🔹 7️⃣ Drop Unnecessary Columns
df.drop(columns=["place_id", "phone_number", "full_address", "opening_hours", "reviews","types"], inplace=True)

# Save the processed dataset
df.to_csv("restaurants_data_processed.csv", index=False,encoding="utf-8-sig")

print("✅ Feature Engineering Complete. Processed dataset saved as `restaurants_data_processed.csv`.")


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


✅ Feature Engineering Complete. Processed dataset saved as `restaurants_data_processed.csv`.


In [12]:

# Load Processed Data
file_path = "restaurants_data_processed.csv"
df = pd.read_csv(file_path)

# Fill missing values
df["rating"].fillna(df["rating"].mean(), inplace=True)  # Fill NaN ratings with average
df.fillna(0, inplace=True)

# Create a "fake user ID" based on restaurant reviews
df["user_id"] = df.index  # Temporary user ID

# Normalize numerical features (Rating, Distance, Sentiment Score, etc.)
# Identify numerical columns
numerical_features = ["normalized_rating", "weighted_rating", "price_level_numeric", "sentiment_score"]

# Find all one-hot encoded cuisine columns (those starting with "cuisine_")
cuisine_features = [col for col in df.columns if col.startswith("cuisine_")]

# Combine numerical and cuisine features
features_to_normalize = numerical_features + cuisine_features

# Apply Min-Max Scaling
scaler = MinMaxScaler()
df[features_to_normalize] = scaler.fit_transform(df[features_to_normalize])

print("✅ Data Preprocessing Done!")


✅ Data Preprocessing Done!


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["rating"].fillna(df["rating"].mean(), inplace=True)  # Fill NaN ratings with average


In [13]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Dummy Preference Scores (If No User Data, Use Avg Ratings as Labels)
df["preference_score"] = df["normalized_rating"]  # Placeholder labels

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(df[features_to_normalize], df["preference_score"], test_size=0.2, random_state=42)

# Train Model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict Scores for All Restaurants
df["predicted_score"] = rf_model.predict(df[features_to_normalize])

# Recommend Top Restaurants
def recommend_restaurants_rf(top_n=5):
    return df.sort_values(by="predicted_score", ascending=False)[["name", "predicted_score", "cuisine"]].head(top_n)

# Example Usage
rf_recommendations = recommend_restaurants_rf(top_n=5)

print("✅ ML-Based Recommendations Using Random Forest Ready!")

print(rf_recommendations)



  df["preference_score"] = df["normalized_rating"]  # Placeholder labels


✅ ML-Based Recommendations Using Random Forest Ready!
                                         name  predicted_score      cuisine
913                              مستودع اثيله              1.0  ['unknown']
2947                          فود تراك سنفوره              1.0  ['unknown']
2073                           مطعم أريج دمشق              1.0   ['arabic']
528   مطبخ شركة الحجاز المتحدة لخدمات الاعاشة              1.0  ['unknown']
2742                            شعبيات الاصيل              1.0  ['unknown']


  df["predicted_score"] = rf_model.predict(df[features_to_normalize])


In [16]:

# Predict ratings on test data
y_pred = rf_model.predict(X_test)

# Calculate Errors
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

print(f"✅ Random Forest Evaluation:\nMSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}")


✅ Random Forest Evaluation:
MSE: 0.0000, RMSE: 0.0026, MAE: 0.0002


In [17]:

### 🔹 4️⃣ Compute `Distance from User`
# Define a sample user location (Makkah City Center)


def compute_distance(lat, lon, user_location):
    """Compute distance between restaurant and user"""
    try:
        return geodesic(user_location, (lat, lon)).km
    except:
        return np.nan



# ✅ Function to get distance from Google Maps API v2
def get_google_distance_v2(user_lat, user_lon, rest_lat, rest_lon, travel_mode="driving"):

    """ Get real-world distance using Google API v2 """

    url = f"https://routes.googleapis.com/directions/v2:computeRoutes"

    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": API_KEY,
        "X-Goog-FieldMask": (
            "routes.distanceMeters,"
            "routes.duration,"
            "routes.legs.steps.navigationInstruction,"
            "routes.legs.distanceMeters,"
            "routes.legs.duration,"
            "routes.legs.startLocation,"
            "routes.legs.endLocation"
        )
    }

    payload = {
        "origin": {"location": {"latLng": {"latitude": user_lat, "longitude": user_lon}}},
        "destination": {"location": {"latLng": {"latitude": rest_lat, "longitude": rest_lon}}},
        "travelMode": travel_mode.upper()  # "DRIVING", "WALKING", "BICYCLING", "TRANSIT"
    }

    response = requests.post(url, headers=headers, json=payload)

    if response.status_code == 200:
        data = response.json()
        if "routes" in data and len(data["routes"]) > 0:
            #return data["routes"][0]["distanceMeters"] / 1000  # Convert meters to km
            #return data["routes"]

            distance = data["routes"][0]["distanceMeters"] / 1000  # Convert meters to km
            duration_sec = int(data["routes"][0]["duration"].replace("s", ""))  # Convert "441s" → 441
            duration_min = round(duration_sec / 60, 1)  # Convert seconds to minutes

            return pd.Series([distance, duration_min])

    return pd.Series([None, None])  # Return None if no distance found


def get_distance_and_duration(location,  lat2,lon2,  mode="driving"):
    """
    Fetch distance and duration between two coordinates using OSRM API.

    Parameters:
        lat1, lon1: Latitude & Longitude of Origin
        lat2, lon2: Latitude & Longitude of Destination
        mode: Transport mode ("driving", "walking", "cycling")

    Returns:
        Distance (km), Duration (minutes)
    """
    lat1, lon1 = location

    #print(f"Requesting: {lon1}, {lat1} to {lon2}, {lat2} in mode: {mode}")  # Debugging statement


    url = f"https://router.project-osrm.org/route/v1/{mode}/{lon1},{lat1};{lon2},{lat2}?overview=false"

    response = requests.get(url)
    data = response.json()

    if "routes" in data and data["routes"]:
        distance = data["routes"][0]["distance"] / 1000  # Convert meters to kilometers
        duration = data["routes"][0]["duration"] / 60  # Convert seconds to minutes
        #print(distance, duration)
        return pd.Series ([round(distance, 2), round(duration, 2)])
    else:
        return  pd.Series ([None, None])


In [39]:
# Assuming you already have the trained model
#from your_model_file import model, preprocess_input

def preprocess_input (user_input):
    df_filtered = df.copy()

    location = user_input['location']

    if 'cuisine' in user_input:
        print(f"Cuisine: {user_input['cuisine']}")
        df_filtered = df_filtered[df_filtered["cuisine"].str.contains(user_input['cuisine'].lower(), case=False, na=False)]

    if 'rating' in user_input:
        print(f"Rating: {user_input['rating']}")
        df_filtered = df_filtered[df_filtered["normalized_rating"]*5 > float(user_input['rating'])]

    if 'price_range' in user_input:
        print(f"Price Range: {user_input['price_range']}")
        df_filtered = df_filtered[df_filtered["price_level_numeric"] <= float(user_input['price_range'])]

    if 'distance' in user_input:
        print(f"Distance: {user_input['distance']}")
        distance = user_input['distance']

    if 'top_n' in user_input:
        print(f"top_n: {user_input['top_n']}")
        top_n = user_input['top_n']
    else:
        top_n = 10

    if 'location' in user_input:
        print(f"Location: {user_input['location']}")
        location = user_input['location']
    else:
        location = (21.4225, 39.8262)

    #print(df_filtered.empty)
    if not(df_filtered.empty):
        # Apply to DataFrame (assuming df has 'latitude' & 'longitude' columns)
        df_filtered[["distance_in_kms","duration_in_mins"]] = df_filtered.apply(lambda row: get_distance_and_duration(location, row["latitude"], row["longitude"], mode="driving"), axis=1)
        df_filtered = df_filtered[df_filtered["distance_in_kms"] <= float(distance)]
        print(f"✅ Found {len(df_filtered)} restaurants serving {user_input['cuisine']} food.")

        # Ensure Random Forest Model is already trained
        df_filtered["predicted_score"] = rf_model.predict(df_filtered[features_to_normalize])

        # Sort by highest predicted preference score
        recommended_restaurants = df_filtered.sort_values(by="predicted_score", ascending=False)[["name", "normalized_rating", "predicted_score", "distance_in_kms","duration_in_mins", "cuisine","longitude","latitude"]].head(10)
        recommended_restaurants["normalized_rating"] = recommended_restaurants["normalized_rating"] * 5
        # Get the top n rows with highest predicted_score
        recommended_restaurants = recommended_restaurants.nlargest(int(top_n), 'predicted_score')



    else:
        print("No Restaurant found with the requested parameters.")
        return None

    return recommended_restaurants

def recommend_restaurants(cuisine=None, rating=None, price_range=None, distance=None, location=None, top_n=None):
    # Initialize a dictionary to hold the input values
    user_input = {}

    # Check if the input was provided for each field, and add it to the input dictionary
    if cuisine:  # If cuisine is provided
        user_input['cuisine'] = cuisine

    if rating:  # If rating is provided
        user_input['rating'] = rating

    if price_range:  # If price range is provided
        user_input['price_range'] = price_range

    if distance:  # If distance is provided
        user_input['distance'] = distance

    if location:  # If location is provided
        user_input['location'] = location

    if top_n:  # If location is provided
        user_input['top_n'] = top_n

    # Now you can preprocess the user_input before feeding it into the model
    # For example, you might need to adjust preprocessing depending on which fields were filled

    recommended_restaurants = preprocess_input(user_input)

    return recommended_restaurants




In [None]:
import json
import pandas as pd
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
from pyngrok import ngrok

# Initialize Flask app
app = Flask(__name__)
CORS(app)  # Allows cross-origin requests

def clean_cuisine(text):
    text = re.sub(r"[\[\]']", "", text)  # Remove brackets and quotes
    words = text.split(", ")  # Split into individual words
    return ", ".join(word.strip().capitalize() for word in words)  # Convert to Camel Case



# Expose the HTML page
@app.route("/")
def serve_html():
    return send_file("/content/restaurant_recommendation_dynamic.html")


@app.route("/ngrok_url.js")
def serve_js():
    return send_file("/content/ngrok_url.js")

# Flask Route to Handle Requests
@app.route("/recommend", methods=["POST"])
def recommend():
    data = request.json  # Get JSON request data
    cuisine = data.get("cuisine", "pakistani")
    min_rating = data.get("min_rating", 3.5)
    price_range = data.get("price_range", 3)
    max_distance = data.get("max_distance", 10)
    latitude = data.get("latitude", 21.4225)
    longitude = data.get("longitude", 39.8262)
    top_n = data.get("top_n", 5)

    recommendations = recommend_restaurants(cuisine, min_rating,price_range, max_distance, (latitude, longitude), top_n)
    recommendations["cuisine"] = recommendations["cuisine"].apply(clean_cuisine)  # Apply to each row

    return jsonify(recommendations.to_dict(orient="records"))  # Return results as JSON

# Start the server using ngrok
public_url = ngrok.connect(5000)

# Save the URL for frontend use
with open("/content/ngrok_url.js", "w") as f:
    f.write(f'const NGROK_URL = "{public_url}";')

print(f"Public URL: {public_url}")

app.run(port=5000)


Public URL: NgrokTunnel: "https://4bea-104-196-201-225.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [16/Mar/2025 23:17:46] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Mar/2025 23:17:46] "GET /ngrok_url.js HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Mar/2025 23:17:47] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [16/Mar/2025 23:19:05] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Mar/2025 23:19:05] "[36mGET /ngrok_url.js HTTP/1.1[0m" 304 -


Cuisine: lebanese
Rating: 4
Price Range: 3
Distance: 10
top_n: 5
Location: ('21.4225', '39.8262')


INFO:werkzeug:127.0.0.1 - - [16/Mar/2025 23:19:25] "POST /recommend HTTP/1.1" 200 -


✅ Found 2 restaurants serving Pakistani food.


In [None]:
import requests

url = "https://9c69-104-196-201-225.ngrok-free.app/recommend"
data = {
    "cuisine": "Pakistani",
    "min_rating": 3.5,
    "max_distance": 10,
    "latitude": 21.4225,
    "longitude": 39.8262,
    "top_n": 5
}

response = requests.post(url, json=data)
print(response.status_code)  # Should be 200 if working
print(response.json())  # Expected: JSON list of recommended restaurants
