In [None]:
### Steps:
##### 1.Explore the latitude and longitude coordinates of the restaurants and visualize their distribution on a map.
##### 2.Group the restaurants by city or locality and analyze the concentration of restaurants in different areas.
##### 3.Calculate statistics such as the average ratings, cuisines, or price ranges by city or locality.
##### 4.Identify any interesting insights or patterns related to the locations of the restaurant

In [None]:
import pandas as pd

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'  
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(df.head())


import geopandas as gpd
import matplotlib.pyplot as plt

# Update the column names based on your dataset
latitude_column = 'Latitude'  # Replace with your latitude column name
longitude_column = 'Longitude'  # Replace with your longitude column name

# Convert the dataframe to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[longitude_column], df[latitude_column]))

# Plot the distribution of restaurants
gdf.plot(marker='o', color='red', markersize=5)
plt.title('Restaurant Locations')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()


In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt

# Attempt to use GeoPandas' built-in datasets (if available)
try:
    world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
except AttributeError:
    # Download the Natural Earth dataset from a reliable source if the above fails
    world = gpd.read_file('https://github.com/nvkelso/natural-earth-vector/blob/master/geojson/ne_110m_admin_0_countries.geojson?raw=true')

# Your DataFrame containing restaurant data
# Update the column names based on your dataset
latitude_column = 'Latitude'  # Replace with your latitude column name
longitude_column = 'Longitude'  # Replace with your longitude column name

# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[longitude_column], df[latitude_column]))

# Plot the world map
fig, ax = plt.subplots(figsize=(15, 10))
world.boundary.plot(ax=ax, linewidth=1)

# Plot the distribution of restaurants
gdf.plot(ax=ax, marker='o', color='red', markersize=5, alpha=0.6)

# Set plot title and labels
plt.title('Restaurant Locations on World Map')
plt.xlabel('Longitude')
plt.ylabel('Latitude')

# Show the plot
plt.show()

In [None]:
import pandas as pd
import plotly.express as px

# Load your dataset
file_path = 'Preprocessed_Dataset.csv'
df = pd.read_csv(file_path)

# Replace with your actual DataFrame and column names
latitude_column = 'Latitude'
longitude_column = 'Longitude'

# Set your Mapbox token (replace 'YOUR_MAPBOX_TOKEN' with your actual token)
px.set_mapbox_access_token('YOUR_MAPBOX_TOKEN')

# Plot restaurant locations on an interactive map
fig = px.scatter_mapbox(df, lat=latitude_column, lon=longitude_column, zoom=2.5)

# Update layout for map style and size
fig.update_layout(
    mapbox_style='carto-darkmatter',  # Choose a map style: 'open-street-map', 'carto-positron', 'carto-darkmatter', etc.
    mapbox_zoom=2.5,  # Adjust zoom level as needed
    mapbox_center={"lat": df[latitude_column].mean(), "lon": df[longitude_column].mean()},  # Center map based on data
    height=600,  # Adjust height of the map
    width=800,   # Adjust width of the map
)

# Show the interactive map
fig.show()

In [None]:
import folium
from folium.plugins import MarkerCluster

# Your DataFrame containing restaurant data
# Replace with your actual DataFrame and column names
latitude_column = 'Latitude'
longitude_column = 'Longitude'

# Initialize the map centered around the mean coordinates of the restaurants
map_center = [df[latitude_column].mean(), df[longitude_column].mean()]
m = folium.Map(location=map_center, zoom_start=2)

# Add a marker cluster to the map
marker_cluster = MarkerCluster().add_to(m)

# Add each restaurant as a marker to the cluster
for idx, row in df.iterrows():
    popup_text = f"<b>{row['Restaurant Name']}</b><br>"
    popup_text += f"<b>Cuisine:</b> {row['Cuisines']}<br>"
    popup_text += f"<b>Rating:</b> {row['Aggregate rating']}"
    
    folium.Marker(
        location=[row[latitude_column], row[longitude_column]],
        popup=folium.Popup(popup_text, max_width=300),
        tooltip=row['Restaurant Name'],  # Tooltip shows restaurant name on hover
        icon=folium.Icon(color='red', icon='info-sign')
    ).add_to(marker_cluster)

# Save the map to an HTML file
map_file = 'restaurant_map.html'
m.save(map_file)

# Display the map
m


In [None]:
import folium
from folium.plugins import MarkerCluster, HeatMap

# Your DataFrame containing restaurant data
# Replace with your actual DataFrame and column names
latitude_column = 'Latitude'
longitude_column = 'Longitude'

# Initialize the map centered around the mean coordinates of the restaurants
map_center = [df[latitude_column].mean(), df[longitude_column].mean()]
m = folium.Map(location=map_center, zoom_start=2)

# Create a list of restaurant locations
restaurant_locations = df[[latitude_column, longitude_column]].values.tolist()

# Add a heatmap layer to the map
HeatMap(restaurant_locations).add_to(m)

# Add a marker cluster to the map
marker_cluster = MarkerCluster().add_to(m)

# Add each restaurant as a marker to the cluster
for idx, row in df.iterrows():
    popup_text = f"<b>{row['Restaurant Name']}</b><br>"
    popup_text += f"<b>Cuisine:</b> {row['Cuisines']}<br>"
    popup_text += f"<b>Rating:</b> {row['Aggregate rating']}"
    
    folium.Marker(
        location=[row[latitude_column], row[longitude_column]],
        popup=folium.Popup(popup_text, max_width=300),
        tooltip=row['Restaurant Name'],  # Tooltip shows restaurant name on hover
        icon=folium.Icon(color='red', icon='info-sign')
    ).add_to(marker_cluster)

# Save the map to an HTML file
map_file = 'restaurant_cluster_heatmap.html'
m.save(map_file)

# Display the map
m


In [None]:
# Group by city or locality
city_group = df.groupby('City').size().reset_index(name='restaurant_count')

# Sort the cities by the number of restaurants in descending order
city_group_sorted = city_group.sort_values(by='restaurant_count', ascending=False)

# Display the concentration of restaurants by city
print(city_group_sorted)

In [None]:
# Group by city or locality
city_group = df.groupby('City').size().reset_index(name='restaurant_count')

# Display the concentration of restaurants by city
print(city_group.sort_values(by='restaurant_count', ascending=False).head())


In [None]:
import pandas as pd

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'  # Update with the actual path to your dataset
df = pd.read_csv(file_path)

# Define column names based on your dataset
city_column = 'City'  # Replace with your city column name
rating_column = 'Aggregate rating'  # Replace with your rating column name
cuisine_column = 'Cuisines'  # Replace with your cuisine column name
price_range_column = 'Price range'  # Replace with your price range column name

# Function to safely get the mode
def get_mode(series):
    if series.mode().empty:
        return None
    else:
        return series.mode()[0]

# Calculate average ratings, most common cuisine, and average price range by city
city_stats = df.groupby(city_column).agg({
    rating_column: 'mean',
    cuisine_column: get_mode,  # Most common cuisine
    price_range_column: 'mean'
}).reset_index()

# Display the calculated statistics
print(city_stats.head())


In [None]:
import pandas as pd

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'  # Update with the actual path to your dataset
df = pd.read_csv(file_path)

# Define column names based on your dataset
city_column = 'City'  # Replace with your city column name
rating_column = 'Aggregate rating'  # Replace with your rating column name
cuisine_column = 'Cuisines'  # Replace with your cuisine column name
price_range_column = 'Price range'  # Replace with your price range column name

# Function to safely get the mode
def get_mode(series):
    if series.mode().empty:
        return None
    else:
        return series.mode()[0]

# Calculate average ratings, most common cuisine, and average price range by city
city_stats = df.groupby(city_column).agg({
    rating_column: 'mean',
    cuisine_column: get_mode,  # Most common cuisine
    price_range_column: 'mean'
}).reset_index()

# Display the calculated statistics
print(city_stats.head())

In [None]:
# Group by city or locality
city_group = df.groupby('City').size().reset_index(name='restaurant_count')

# Display the concentration of restaurants by city
print(city_group.sort_values(by='restaurant_count', ascending=False).head())


In [None]:
# Additional analysis can be done here to find patterns
# For example, comparing cities with the highest average ratings

high_rated_cities = city_stats.sort_values(by='Aggregate rating', ascending=False).head()
print(high_rated_cities)

In [None]:
# Print the high rated cities
print("\nCities with the Highest Average Ratings:\n")
print(high_rated_cities)

# Optional: Identify interesting insights or patterns
# For example, print additional details about these cities or visualize them on a map
for city in high_rated_cities['City']:
    city_data = df[df[city_column].str.lower() == city.lower()]
    # Perform additional analysis or visualization here based on city_data
    # Example: Print top cuisines, average cost for two, etc.
    top_cuisines = city_data[cuisine_column].value_counts().head(3)
    average_cost_for_two = city_data['Average Cost for two'].mean()  # Replace 'cost_for_two_column' with actual column name
    print(f"\nAdditional Insights for {city}:\n")
    print(f"Top 3 Cuisines: {', '.join(top_cuisines.index)}")
    print(f"Average Cost for Two: ${average_cost_for_two:.2f}")


In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt

# Example: Assuming you have latitude and longitude columns in high_rated_cities DataFrame
# Replace with actual column names if different
high_rated_cities['Latitude'] = [28.6139, 41.9028, 51.5074, 45.5089, 19.0760]  # Example latitude data
high_rated_cities['Longitude'] = [77.2090, 12.4964, -0.1278, -73.5540, 72.8777]  # Example longitude data

# Create a GeoDataFrame for high rated cities
gdf_high_rated_cities = gpd.GeoDataFrame(high_rated_cities, geometry=gpd.points_from_xy(high_rated_cities['Longitude'], high_rated_cities['Latitude']))
gdf_high_rated_cities.crs = 'EPSG:4326'  # Set CRS to WGS84

# Initialize the plot
fig, ax = plt.subplots(figsize=(10, 8))
ax.set_facecolor('black')  # Set black background

# Plot world countries
world.plot(ax=ax, color='lightgrey', edgecolor='white')

# Plot the high rated cities
gdf_high_rated_cities.plot(ax=ax, marker='o', color='red', markersize=100, label='High Rated Cities')

# Annotate each point with city names
for idx, row in gdf_high_rated_cities.iterrows():
    ax.text(row.geometry.x, row.geometry.y, row['City'], fontsize=12, ha='center', color='white')

# Customize plot labels and title
plt.title('Cities with Highest Average Ratings', fontsize=16)
plt.xlabel('Longitude', fontsize=14)
plt.ylabel('Latitude', fontsize=14)

# Add legend and grid (optional)
plt.legend()
plt.grid(True)

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'  # Update with the actual path to your dataset
df = pd.read_csv(file_path)

# Define column names based on your dataset
city_column = 'City'  # Replace with your city column name
rating_column = 'Aggregate rating'  # Replace with your rating column name
cuisine_column = 'Cuisines'  # Replace with your cuisine column name
price_range_column = 'Price range'  # Replace with your price range column name
cost_for_two_column = 'Average Cost for two'  # Replace with your cost for two column name

# Function to safely get the mode
def get_mode(series):
    if series.mode().empty:
        return None
    else:
        return series.mode()[0]

# Calculate average ratings, most common cuisine, and average price range by city
city_stats = df.groupby(city_column).agg({
    rating_column: 'mean',
    cuisine_column: get_mode,  # Most common cuisine
    price_range_column: 'mean',
    cost_for_two_column: 'mean',  # Average cost for two
    'Has Table booking': lambda x: 'Yes' if any(x == 'Yes') else 'No',  # Has Table booking
    'Has Online delivery': lambda x: 'Yes' if any(x == 'Yes') else 'No',  # Has Online delivery
    'Rating color': lambda x: x.mode()[0],  # Most common Rating color
    'Rating text': lambda x: x.mode()[0],  # Most common Rating text
    'Votes': 'sum'  # Sum of Votes
}).reset_index()

# Display the calculated statistics
print(city_stats.head())

# Ask user for city input
user_city = input("\nEnter the city name to get insights: ").strip()

# Filter data for the user-selected city
city_data = df[df[city_column].str.lower() == user_city.lower()]

# Check if city_data is not empty
if not city_data.empty:
    # Perform additional analysis or visualization based on city_data
    # Example: Print top cuisines, average cost for two, etc.
    top_cuisines = city_data[cuisine_column].value_counts().head(3)
    average_cost_for_two = city_data[cost_for_two_column].mean()

    print(f"\nAdditional Insights for {user_city}:\n")
    print(f"Top 3 Cuisines: {', '.join(top_cuisines.index)}")
    print(f"Average Cost for Two: ${average_cost_for_two:.2f}")
else:
    print(f"\nNo data found for '{user_city}'.")
