In [1]:
from pymongo import MongoClient
import geopandas as gpd
from shapely.geometry import shape, mapping
import folium
import math
from pprint import pprint

# Integration

In [3]:
# client.drop_database("my_database")

## Connection to MongoDB and loading the files

In [5]:
# Connection to MongoDB
client = MongoClient("mongodb://admin:DataMan2023!@localhost:27017/")
db = client["my_database"]
collection = db["neighborhoods"]

In [6]:
# Reading the files
gdf_combined = gpd.read_file("C:/Users/edoar/combined_quartieri.geojson")
PolyHomePrices = gpd.read_file("C:/Users/edoar/PolyHomePrices.geojson")
PolyRestaurants = gpd.read_file("C:/Users/edoar/PolyRestaurants.geojson")
PolyMuseums = gpd.read_file("C:/Users/edoar/PolyMuseums.geojson")
PolyNightlife = gpd.read_file("C:/Users/edoar/PolyNightlife.geojson")
PolyDogParks = gpd.read_file("C:/Users/edoar/PolyDogParks.geojson")
PolyPharmacy = gpd.read_file("C:/Users/edoar/PolyPharmacy.geojson")
PolyPlaygrounds = gpd.read_file("C:/Users/edoar/PolyPlaygrounds.geojson")
PolySportVenues = gpd.read_file("C:/Users/edoar/PolySportVenues.geojson")
PolySchools = gpd.read_file("C:/Users/edoar/PolySchools.geojson")
PolyUniversity = gpd.read_file("C:/Users/edoar/PolyUniversity.geojson")
PolyCoworking = gpd.read_file("C:/Users/edoar/PolyCoworking.geojson")
PolyLibraries = gpd.read_file("C:/Users/edoar/PolyLibraries.geojson")
PolySupermarkets = gpd.read_file("C:/Users/edoar/PolySupermarkets.geojson")
PolyTransport = gpd.read_file("C:/Users/edoar/PolyTransport.geojson")

## Initializing the dictionary

In [11]:
# Create a dictionary to hold all neighborhood docs
neighborhood_docs = {}

# populating the base neighborhood documents
for idx, row in gdf_combined.iterrows():
    nb_name = row["Neighborhood"]
    # using shapely's "mapping function" from shapely.geometry to convert geometries
    # to a geojson-like dictionary to store them in MongoDB
    geo_json = mapping(row["geometry"])

    neighborhood_docs[nb_name] = {
        "_id": nb_name,
        "neighborhood_name": nb_name,
        "geometry": geo_json,
        "locations": {
            "restaurants": [],
            "museums": [],
            "nightlife": [],
            "dogparks": [],
            "pharmacies": [],
            "playgrounds": [],
            "sportvenues": [],
            "schools": [],
            "universities": [],
            "coworking": [],
            "libraries": [],
            "supermarkets": [],
            "transport": []
        },
        "home_prices": {
            "min_price": None,  # placeholder for the min price
            "max_price": None,  # placeholder for the max price
            "avg_price": None   # placeholder for the avg price
        }
    }

## Function to append rows

In [13]:
# Function to append rows from a POI GeoDataFrame to the neighborhood_docs
def append_to_neighborhoods(field, gdf, poi_key, field_mappings=None):
    """
    field: the sub-document where the list will be appended to. 
    gdf: A GeoDataFrame with columns ["Neighborhood", ...data columns...].
    poi_key: e.g. "pharmacies", "restaurants", etc.
    field_mappings: dict of { "source_column": "destination_field_name", ... }
                    used to pick and rename columns from the gdf row.
    """
    if field_mappings is None:
        # if not supplied, just store all columns except geometrya and Neighborhood
        field_mappings = {
            col: col 
            for col in gdf.columns 
            if col not in ("Neighborhood", "geometry")
        }
    
    # group by Neighborhood to handle rows for each neighborhood
    grouped = gdf.groupby("Neighborhood")
    
    # for each neighborhood:
    for nb_name, group_df in grouped:
        # if neighborhood is not in the dict, skip it
        if nb_name not in neighborhood_docs:
            continue
        
        # convert each row to a dictionary with the needed fields
        for _, row in group_df.iterrows():
            poi_data = {}
            for src_col, dest_col in field_mappings.items():
                if src_col in row:
                    poi_data[dest_col] = row[src_col]
            
            # append to the correct list inside the correct field
            neighborhood_docs[nb_name][field][poi_key].append(poi_data)

## Appending the GeoDataFrames

In [18]:
# Append each of the 13 POI DataFrames to the base neighborhood docs

# Restaurants
append_to_neighborhoods(
    field="locations",
    gdf=PolyRestaurants,
    poi_key="restaurants",
    field_mappings={
        "Business Name": "name",
        "Business Address": "address",
        "Categories": "category",
        "Average Star Rating": "avg_star_rating",
        "Review Count": "tot_ratings",
        "Price": "price"
    }
)

# Museums
append_to_neighborhoods(
    field="locations",
    gdf=PolyMuseums,
    poi_key="museums",
    field_mappings={
        "Museum Name": "name",
        "Museum Address": "address",
        "Categories": "category",
        "Average Star Rating": "avg_star_rating",
        "Review Count": "tot_ratings"
    }
)

# Nightlife
append_to_neighborhoods(
    field="locations",
    gdf=PolyNightlife,
    poi_key="nightlife",
    field_mappings={
        "Venue Name": "name",
        "Venue Address": "address",
        "Categories": "category",
        "Average Star Rating": "avg_star_rating",
        "Review Count": "tot_ratings"
    }
)

# Dog Parks
append_to_neighborhoods(
    field="locations",
    gdf=PolyDogParks,
    poi_key="dogparks",
    field_mappings={
        "località": "name",
        "area_mq": "area_mq",
        "perim_m": "perimeter_m",
        "obj_id": "park_id",
        "municipio": "municipality"
    }
)

# Pharmacies
append_to_neighborhoods(
    field="locations",
    gdf=PolyPharmacy,
    poi_key="pharmacies",
    field_mappings={
        "DESCRIZIONE_FARMACIA": "name",
        "INDIRIZZO": "address",
        "CODICE_FARMACIA": "pharmacy_id",
        "MUNICIPIO": "municipality"
    }
)

# Playgrounds
append_to_neighborhoods(
    field="locations",
    gdf=PolyPlaygrounds,
    poi_key="playgrounds",
    field_mappings={
        "località": "name",
        "area_mq": "area_mq",
        "perim_m": "perimeter_m",
        "obj_id": "park_id",
        "municipio": "municipality"
    }
)

# Sport Venues
append_to_neighborhoods(
    field="locations",
    gdf=PolySportVenues,
    poi_key="sportvenues",
    field_mappings={
        "Nome": "name",
        "Indirizzo": "address",
        "info": "category",
    }
)

# Schools
append_to_neighborhoods(
    field="locations",
    gdf=PolySchools,
    poi_key="schools",
    field_mappings={
        "DENOMINAZIONESCUOLA": "name",
        "INDIRIZZOSCUOLA": "address",
        "DESCRIZIONECARATTERISTICASCUOLA": "school_type",
        "DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA": "educational_lvl",
        "MUNICIPIO": "municipality"
    }
)

# Universities
append_to_neighborhoods(
    field="locations",
    gdf=PolyUniversity,
    poi_key="universities",
    field_mappings={
        "DENOMINAZ": "name",
        "INDIRIZZO": "address",
        "FACOLTA": "faculty",
        "PROPRIETA": "ownership_type",
        "MUNICIPIO": "municipality"
    }
)

# Coworking
append_to_neighborhoods(
    field="locations",
    gdf=PolyCoworking,
    poi_key="coworking",
    field_mappings={
        "SPAZIO": "name",
        "Sede": "address",
        "Orario di apertura": "opening_hrs",
        "Numero postazioni": "tot_desks",
        "MUNICIPIO": "municipality"
    }
)

# Libraries
append_to_neighborhoods(
    field="locations",
    gdf=PolyLibraries,
    poi_key="libraries",
    field_mappings={
        "Biblioteche - Sede": "name",
        "Indirizzo": "address",
        "MUNICIPIO": "municipality"
    }
)

# Supermarkets
append_to_neighborhoods(
    field="locations",
    gdf=PolySupermarkets,
    poi_key="supermarkets",
    field_mappings={
        "name": "name"
    }
)

# Transport
append_to_neighborhoods(
    field="locations",
    gdf=PolyTransport,
    poi_key="transport",
    field_mappings={
        "Nome": "name",
        "Linee": "lines",
        "Mezzo": "transport_type"
    }
)

# Home Prices
for idx, row in PolyHomePrices.iterrows():
    nb_name = row["Neighborhood"]
    if nb_name in neighborhood_docs:
        # updateing the home_prices container with actual values
        neighborhood_docs[nb_name]["home_prices"]["min_price"] = row["Compr_min"]
        neighborhood_docs[nb_name]["home_prices"]["max_price"] = row["Compr_max"]
        neighborhood_docs[nb_name]["home_prices"]["avg_price"] = row["Compr_mean"]

## Inserting the data into MongoDB

In [21]:
# Insert the data into MongoDB
# (neighborhood_docs is a dictionary with neighborhood_name as keys and the final documents as values)
documents_to_insert = list(neighborhood_docs.values())  # conversion to list of dicts

collection.insert_many(documents_to_insert)

print("Data inserted into MongoDB")

Data inserted into MongoDB


# Queries

## Neighborhood (Example)

In [25]:
# Query for the neighborhood "Tre Torri"
tretorri_data = collection.find_one({"neighborhood_name": "Tre Torri"})
pprint(tretorri_data)

{'_id': 'Tre Torri',
 'geometry': {'coordinates': [[[9.1598895, 45.4742524],
                               [9.1599665, 45.4752257],
                               [9.159981, 45.475719],
                               [9.1600586, 45.4772662],
                               [9.1600766, 45.4774377],
                               [9.1600869, 45.4779766],
                               [9.1601359, 45.4786687],
                               [9.1602194, 45.4800522],
                               [9.1602396, 45.4801317],
                               [9.1602753, 45.4803015],
                               [9.1600595, 45.4803093],
                               [9.1586847, 45.4803591],
                               [9.1559822, 45.4803917],
                               [9.1516378, 45.4804495],
                               [9.1516812, 45.4802959],
                               [9.1516687, 45.4800506],
                               [9.1515654, 45.4780585],
                             

## Most diverse Neighborhoods (in terms of amenities)

In [28]:
pipeline = [
    {
        "$addFields": {
            "diversity_score": {
                "$size": {
                    "$filter": {
                        "input": {"$objectToArray": "$locations"},  # Convert 'locations' sub-document to array
                        "as": "amenity",
                        "cond": {"$gt": [{"$size": "$$amenity.v"}, 0]}  # Count non-empty categories
                    }
                }
            }
        }
    },
    {"$sort": {"diversity_score": -1}},  # Sort neighborhoods by diversity score (highest first)
    {"$limit": 5},  # Return only the top 5 neighborhoods
    {"$project": {  # Project the fields to include in the output
        "neighborhood_name": 1,
        "diversity_score": 1
    }}
]

# Execute the query
results = list(collection.aggregate(pipeline))

# Output results
print("\n=== Top 5 Neighborhoods with the Most Diverse Amenities ===")
for i, result in enumerate(results, start=1):
    print(f"{i}. {result['neighborhood_name']} (Diversity Score: {result['diversity_score']})")



=== Top 5 Neighborhoods with the Most Diverse Amenities ===
1. Guastalla (Diversity Score: 13)
2. Bovisa (Diversity Score: 13)
3. Buenos Aires - Venezia (Diversity Score: 13)
4. Città Studi (Diversity Score: 12)
5. Bicocca (Diversity Score: 12)


## Score calculation (Students, Singles/Couples, Families)

### NaNs removals

##### Since some of the neighborhoods have NaN values for the avg_price of the homes, the choice was to either exlude them at all from the ranking, or to assign the global average as their average home prices. Since keeping all the neighborhoods for comparison was our main goal, we decided to proceed with the second option

In [33]:
neighborhoods_with_nan = collection.find({})
print("Neighborhoods with avg_price = NaN:")
for doc in neighborhoods_with_nan:
    avg_price = doc.get("home_prices", {}).get("avg_price")
    if avg_price is not None and math.isnan(avg_price):  # Check if avg_price is NaN
        print(doc["neighborhood_name"])

Neighborhoods with avg_price = NaN:
Chiaravalle
Quintosole
Ronchetto delle Rane


In [35]:
# Compute global minimum or average price
price_stats = collection.aggregate([
    {"$match": {"home_prices.avg_price": {"$not": {"$eq": float("NaN")}}}},  # Exclude NaN and missing values
    {"$group": {
        "_id": None,
        "avg_avg_price": {"$avg": "$home_prices.avg_price"}
    }}
])
price_stats = next(price_stats, None)

global_avg_price = price_stats["avg_avg_price"]

# Update neighborhoods with NaN avg_price to the global average price
collection.update_many(
    {"neighborhood_name": {"$in": ["Chiaravalle", "Quintosole", "Ronchetto delle Rane"]}},
    {"$set": {"home_prices.avg_price": global_avg_price}}
)

UpdateResult({'n': 3, 'nModified': 3, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

### Function to Compute Score

##### The function takes into consideration the number of locations of each type and the average price of homes in each neighborhood. Further development should be focused on taking into consideration also attributes of each location, like the quality of restaurants, the square metres of parks and playgrounds, or the number of workstations in coworking spaces. For this project we opted for a general score for each neighborhood, since assigning weights to those location-specific attributes would require external knowledge from an expert of the field, or also questionaries from the public.

In [39]:
def compute_score(neighborhood_doc, weights, price_weight, collection):
    # Compute a score for a neighborhood, considering distinct POIs for certain categories.

    # takes in input
    #    neighborhood_doc: A MongoDB document with neighborhood data.
    #    weights: A dictionary of weights for each POI category.
    #    price_weight: Weight to apply to the normalized avg_price.
    #    collection: The MongoDB collection to query for global min and max avg_price.

    # Returns:
    #    The total score for the neighborhood.

    total_score = 0.0

    # Categories requiring distinct filtering
    distinct_categories = {"universities", "sportvenues", "schools"}

    # Retrieve global min and max for each POI category to normalize the count
    poi_stats = collection.aggregate([
        {"$project": {
            "poi_counts": {
                "$map": {
                    "input": {"$objectToArray": "$locations"},
                    "as": "poi",
                    "in": {"k": "$$poi.k", "v": {"$size": {"$ifNull": ["$$poi.v", []]}}}
                }
            }
        }},
        {"$unwind": "$poi_counts"},
        {"$group": {
            "_id": "$poi_counts.k",
            "min_count": {"$min": "$poi_counts.v"},
            "max_count": {"$max": "$poi_counts.v"}
        }}
    ])

    # Convert the results into a dictionary
    global_poi_min_max = {stat["_id"]: {"min": stat["min_count"], "max": stat["max_count"]} for stat in poi_stats}

    # Normalize the POI counts and compute the scores
    for category, weight in weights.items():
        pois = neighborhood_doc.get("locations", {}).get(category, [])

        if category in distinct_categories:
            # For distinct categories, filter unique entries by address
            unique_pois = {poi.get("address") for poi in pois if "address" in poi}
            count = len(unique_pois)
        else:
            # Regular count for other categories
            count = len(pois)

        global_min = global_poi_min_max.get(category, {}).get("min", 0)
        global_max = global_poi_min_max.get(category, {}).get("max", 1)  # Avoid division by zero

        if global_max > global_min:
            normalized_count = (count - global_min) / (global_max - global_min)
        else:
            normalized_count = 0.0

        total_score += normalized_count * weight

    # Price influence
    avg_price = neighborhood_doc.get("home_prices", {}).get("avg_price")

    # Retrieve global min and max prices from the database for normalization of the avg price
    price_stats = collection.aggregate([
        {"$group": {
            "_id": None,
            "min_avg_price": {"$min": "$home_prices.avg_price"},
            "max_avg_price": {"$max": "$home_prices.avg_price"}
        }}
    ])
    price_stats = next(price_stats, None)
    min_avg_price = price_stats["min_avg_price"]
    max_avg_price = price_stats["max_avg_price"]

    normalized_price = (avg_price - min_avg_price) / (max_avg_price - min_avg_price)
    total_score -= normalized_price * price_weight

    return total_score


### Weights

In [42]:
# Example weighting dictionaries (tweak as you wish)

students_weights = {
    "restaurants": 2.0,
    "museums": 5.0,
    "nightlife": 8.0,
    "dogparks": 1.0,
    "pharmacies": 6.0,
    "playgrounds": 6.0,
    "sportvenues": 8.0,
    "schools": 1.0,
    "universities": 10.0,
    "coworking": 7.0,
    "libraries": 9.0,
    "supermarkets": 9.0,
    "transport": 10.0
}
# price weight
price_weight_students = 10.0


single_couples_weights = {
    "restaurants": 7.0,
    "museums": 5.0,
    "nightlife": 8.0,
    "dogparks": 5.0,
    "pharmacies": 6.0,
    "playgrounds": 1.0,
    "sportvenues": 7.0,
    "schools": 1.0,
    "universities": 1.0,
    "coworking": 8.0,
    "libraries": 5.0,
    "supermarkets": 10.0,
    "transport": 10.0
}
# price weight
price_weight_single_couples = 6.0


families_weights = {
    "restaurants": 1.0,
    "museums": 6.0,
    "nightlife": 1.0,
    "dogparks": 10.0,
    "pharmacies": 7.0,
    "playgrounds": 10.0,
    "sportvenues": 3.0,
    "schools": 10.0,
    "universities": 1.0,
    "coworking": 1.0,
    "libraries": 8.0,
    "supermarkets": 8.0,
    "transport": 4.0
}
# price weight
price_weight_families = 7.5


### Scores

In [45]:
# Read all neighborhoods
all_neighborhoods = list(collection.find({}))

# --- Ranking for Students ---
print("=== Ranking for Students ===")
students_scores = []
for nb in all_neighborhoods:
    score = compute_score(nb, students_weights, price_weight= price_weight_students, collection=collection)
    students_scores.append({
        "neighborhood_name": nb["neighborhood_name"],
        "score": score
    })

# sort by score descending
students_scores.sort(key=lambda x: x["score"], reverse=True)

# transform scores into percentages to be able to interpret better the scoring
if students_scores:
    max_score_students = students_scores[0]["score"]
    for item in students_scores: # sort of scaling
        item["percentage"] = (item["score"] / max_score_students) * 100 if max_score_students > 0 else 0 

# showing the top 5 neighborhoods
for rank, item in enumerate(students_scores[:5], start=1):
    print(f"{rank}. {item['neighborhood_name']} => {item['percentage']:.2f}%")


# --- Ranking for Singles/Couples ---
print("\n=== Ranking for Singles/Couples ===")
single_couples_scores = []
for nb in all_neighborhoods:
    score = compute_score(nb, single_couples_weights, price_weight= price_weight_single_couples, collection=collection)
    single_couples_scores.append({
        "neighborhood_name": nb["neighborhood_name"],
        "score": score
    })

single_couples_scores.sort(key=lambda x: x["score"], reverse=True)

# transform scores into percentages to be able to interpret better the scoring
if single_couples_scores:
    max_score_single_couples = single_couples_scores[0]["score"]
    for item in single_couples_scores: # sort of scaling
        item["percentage"] = (item["score"] / max_score_single_couples) * 100 if max_score_single_couples > 0 else 0 

# showing the top 5 neighborhoods
for rank, item in enumerate(single_couples_scores[:5], start=1):
    print(f"{rank}. {item['neighborhood_name']} => {item['percentage']:.2f}%")


# --- Ranking for Families ---
print("\n=== Ranking for Families ===")
families_scores = []
for nb in all_neighborhoods:
    score = compute_score(nb, families_weights, price_weight= price_weight_families, collection=collection)
    families_scores.append({
        "neighborhood_name": nb["neighborhood_name"],
        "score": score
    })

families_scores.sort(key=lambda x: x["score"], reverse=True)

# transform scores into percentages to be able to interpret better the scoring
if families_scores:
    max_score_families = families_scores[0]["score"]
    for item in families_scores: # sort of scaling
        item["percentage"] = (item["score"] / max_score_families) * 100 if max_score_families > 0 else 0 

# showing the top 5 neighborhoods
for rank, item in enumerate(families_scores[:5], start=1):
    print(f"{rank}. {item['neighborhood_name']} => {item['percentage']:.2f}%")

=== Ranking for Students ===
1. Buenos Aires - Venezia => 100.00%
2. Città Studi => 69.57%
3. Niguarda - Cà Granda => 65.02%
4. Duomo => 64.33%
5. Villapizzone => 63.95%

=== Ranking for Singles/Couples ===
1. Buenos Aires - Venezia => 100.00%
2. Duomo => 72.00%
3. Città Studi => 57.49%
4. Niguarda - Cà Granda => 54.08%
5. Villapizzone => 52.21%

=== Ranking for Families ===
1. Niguarda - Cà Granda => 100.00%
2. Buenos Aires - Venezia => 98.91%
3. Villapizzone => 81.92%
4. Stadera => 81.15%
5. Gallaratese => 72.33%


### Cloropleth map (for Students)

In [48]:
# Fetch all neighborhood documents and calculate scores using the previously choosen students scores
neighborhood_data = [
    {
        "neighborhood_name": doc["neighborhood_name"],
        "geometry": shape(doc["geometry"]),
        "score": compute_score(doc, students_weights, price_weight=price_weight_students, collection=collection)
    }
    for doc in collection.find({})
]

# find the maximum score
max_score = max([item["score"] for item in neighborhood_data], default=1)  #to avoid division by zero

# normalizing the scores to percentages
for item in neighborhood_data:
    item["percentage"] = (item["score"] / max_score) * 100 if max_score > 0 else 0

# conversion to GeoDataFrame and ensuring CRS is set to EPSG:4326
gdf = gpd.GeoDataFrame(neighborhood_data, crs="EPSG:4326")

# generate an empty folium map
map1 = folium.Map(location=[45.4642, 9.1900], zoom_start=12)

# first add a choropleth layer
folium.Choropleth(
    geo_data=gdf.to_json(),
    name="choropleth",
    data=gdf,
    columns=["neighborhood_name", "percentage"],
    key_on="feature.properties.neighborhood_name",
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Students Ranking Score (%)"
).add_to(map1)

# add the tooltip layer with transparent polygons
folium.GeoJson(
    gdf,
    name="Neighborhoods",
    tooltip=folium.GeoJsonTooltip(
        fields=["neighborhood_name", "percentage"],
        aliases=["Neighborhood:", "Score (%):"],
        localize=True
    ),
    style_function=lambda x: {"fillColor": "transparent", "color": "transparent", "weight": 0}
).add_to(map1)

map1

## Suitable Neighborhoods

##### Here the user gets asked some questions in order to present the most suitable neighborhood depending on their needs. Further development could be focus on asking even more in depth questions, for example if it is important for restaurants to have excellent reviews, or if some combinations are prefered compared to others (for example a user could prefer to have lots of restaurants compared to the presence of a library, but ideally would want both to be in their neighborhood)

In [52]:
# Interactive user inputs
print("\nPlease specify your preferences:")

faculty = input("Enter the faculty you're looking for (e.g., Economia): ").strip().upper()  # Convert to uppercase
parks_required = input("Do you need a dog park? (yes/no): ").strip().lower() == "yes"
library_required = input("Do you need a library? (yes/no): ").strip().lower() == "yes"

restaurant_category = input("Enter the type of restaurant you prefer (e.g., Italian): ").strip()
min_restaurants = int(input("Enter the minimum number of restaurants you want: ").strip() or 0)

coworking_required = input("Do you need a coworking space? (yes/no): ").strip().lower() == "yes"
sport_venue_required = input("Do you need a sport venue? (yes/no): ").strip().lower() == "yes"
sport_venue_category = input("Enter the sport venue category (e.g., Piscina, Atletica) [optional]: ").strip().upper() if sport_venue_required else None
supermarket_required = input("Do you need a supermarket? (yes/no): ").strip().lower() == "yes"
museum_required = input("Do you need a museum? (yes/no): ").strip().lower() == "yes"
pharmacy_required = input("Do you need a pharmacy? (yes/no): ").strip().lower() == "yes"
playground_required = input("Do you need a playground? (yes/no): ").strip().lower() == "yes"

transport_required = input("Do you need public transport? (yes/no): ").strip().lower() == "yes"
metro_required = train_required = bus_required = False
if transport_required:
    metro_required = input("Do you need metro service? (yes/no): ").strip().lower() == "yes"
    train_required = input("Do you need train service? (yes/no): ").strip().lower() == "yes"
    bus_required = input("Do you need bus service? (yes/no): ").strip().lower() == "yes"

budget = float(input("Enter your budget for home prices (price in euros per square meter): ").strip() or 0)

# Build the query dynamically based on inputs
query = {
    "$addFields": {
        "match_score": {
            "$add": [
                # Check for faculty match in university
                {"$cond": [
                    {"$in": [faculty, "$locations.universities.faculty"]}, 1, 0
                ]} if faculty else 0,
                # Check for restaurant category match
                {"$cond": [
                    {"$in": [restaurant_category, "$locations.restaurants.category"]}, 1, 0
                ]} if restaurant_category else 0,
                # Check for minimum number of restaurants
                {"$cond": [
                    {"$gte": [{"$size": "$locations.restaurants"}, min_restaurants]}, 1, 0
                ]} if min_restaurants > 0 else 0,
                # Check for parks presence
                {"$cond": [
                    {"$gt": [{"$size": "$locations.dogparks"}, 0]}, 1, 0
                ]} if parks_required else 0,
                # Check for libraries presence
                {"$cond": [
                    {"$gt": [{"$size": "$locations.libraries"}, 0]}, 1, 0
                ]} if library_required else 0,
                # Check for coworking presence
                {"$cond": [
                    {"$gt": [{"$size": "$locations.coworking"}, 0]}, 1, 0
                ]} if coworking_required else 0,
                # Check for sport venue presence
                {"$cond": [
                    {"$gt": [{"$size": "$locations.sportvenues"}, 0]}, 1, 0
                ]} if sport_venue_required else 0,
                # Check for specific sport venue category match
                {"$cond": [
                    {"$in": [sport_venue_category, "$locations.sportvenues.category"]}, 1, 0
                ]} if sport_venue_category else 0,
                # Check for supermarket presence
                {"$cond": [
                    {"$gt": [{"$size": "$locations.supermarkets"}, 0]}, 1, 0
                ]} if supermarket_required else 0,
                # Check for museum presence
                {"$cond": [
                    {"$gt": [{"$size": "$locations.museums"}, 0]}, 1, 0
                ]} if museum_required else 0,
                # Check for pharmacy presence
                {"$cond": [
                    {"$gt": [{"$size": "$locations.pharmacies"}, 0]}, 1, 0
                ]} if pharmacy_required else 0,
                # Check for playground presence
                {"$cond": [
                    {"$gt": [{"$size": "$locations.playgrounds"}, 0]}, 1, 0
                ]} if playground_required else 0,
                # Check for public transport
                {"$cond": [
                    {"$or": [
                        {"$in": ["Metro", "$locations.transport.transport_type"]} if metro_required else False,
                        {"$in": ["Treno", "$locations.transport.transport_type"]} if train_required else False,
                        {"$in": ["Bus", "$locations.transport.transport_type"]} if bus_required else False
                    ]}, 1, 0
                ]} if transport_required else 0,
                # Check for budget in home prices
                {"$cond": [
                    {"$lte": ["$home_prices.avg_price", budget]}, 1, 0
                ]} if budget > 0 else 0
            ]
        }
    }
}

# Run the aggregation pipeline
pipeline = [
    {
        "$match": {
            "home_prices.avg_price": {"$lte": 2 * budget}  # Keep neighborhoods within twice the budget
        }
    },
    query,  # Add the match_score field
    {"$sort": {
            "match_score": -1,  # Higher match scores first
            "home_prices.avg_price": 1  # Lower avg prices first
        }
    },
    {"$limit": 3},  # Limit to the top 3 neighborhoods
    {"$project": {  # Project only relevant fields for output
        "neighborhood_name": 1,
        "match_score": 1,
        "locations": 1,
        "home_prices.avg_price": 1
    }}
]

results = list(collection.aggregate(pipeline))

# Output the top neighborhoods
print("\n=== Top 3 Suitable Neighborhoods ===")
if results:
    for i, neighborhood in enumerate(results, start=1):
        print(f"\n{i}. {neighborhood['neighborhood_name']} (Score: {neighborhood['match_score']})")
        print(f"  - Average Price: €{neighborhood.get('home_prices', {}).get('avg_price', 'N/A')}")
        
        fulfilled = []
        not_fulfilled = []

        # Faculty check
        if faculty:
            if any(faculty in uni.get("faculty", []) for uni in neighborhood["locations"].get("universities", [])):
                fulfilled.append("Faculty")
            else:
                not_fulfilled.append("Faculty")

        # Restaurant category check
        if restaurant_category:
            if any(restaurant_category in rest.get("category", []) for rest in neighborhood["locations"].get("restaurants", [])):
                fulfilled.append("Restaurant Category")
            else:
                not_fulfilled.append("Restaurant Category")

        # Minimum number of restaurants
        if min_restaurants > 0:
            if len(neighborhood["locations"].get("restaurants", [])) >= min_restaurants:
                fulfilled.append("Minimum Number of Restaurants")
            else:
                not_fulfilled.append("Minimum Number of Restaurants")

        # Parks check
        if parks_required:
            if len(neighborhood["locations"].get("dogparks", [])) > 0:
                fulfilled.append("Dog Park")
            else:
                not_fulfilled.append("Dog Park")

        # Library check
        if library_required:
            if len(neighborhood["locations"].get("libraries", [])) > 0:
                fulfilled.append("Library")
            else:
                not_fulfilled.append("Library")

        # Coworking check
        if coworking_required:
            if len(neighborhood["locations"].get("coworking", [])) > 0:
                fulfilled.append("Coworking Space")
            else:
                not_fulfilled.append("Coworking Space")

        # Sport venue check
        if sport_venue_required:
            if len(neighborhood["locations"].get("sportvenues", [])) > 0:
                fulfilled.append("Sport Venue")
            else:
                not_fulfilled.append("Sport Venue")

        # Sport venue category check
        if sport_venue_category:
            if any(sport_venue_category in venue.get("category", []) for venue in neighborhood["locations"].get("sportvenues", [])):
                fulfilled.append("Specific Sport Venue Category")
            else:
                not_fulfilled.append("Specific Sport Venue Category")

        # Supermarket check
        if supermarket_required:
            if len(neighborhood["locations"].get("supermarkets", [])) > 0:
                fulfilled.append("Supermarket")
            else:
                not_fulfilled.append("Supermarket")

        # Museum check
        if museum_required:
            if len(neighborhood["locations"].get("museums", [])) > 0:
                fulfilled.append("Museum")
            else:
                not_fulfilled.append("Museum")

        # Pharmacy check
        if pharmacy_required:
            if len(neighborhood["locations"].get("pharmacies", [])) > 0:
                fulfilled.append("Pharmacy")
            else:
                not_fulfilled.append("Pharmacy")

        # Playground check
        if playground_required:
            if len(neighborhood["locations"].get("playgrounds", [])) > 0:
                fulfilled.append("Playground")
            else:
                not_fulfilled.append("Playground")

        # Transport check
        if transport_required:
            transport_fulfilled = []
            transport_list = neighborhood["locations"].get("transport", [])

            # Iterate through transport_list to find the required transport types
            if metro_required and any("Metro" in transport.get("transport_type", []) for transport in transport_list):
                transport_fulfilled.append("Metro")
            if train_required and any("Treno" in transport.get("transport_type", []) for transport in transport_list):
                transport_fulfilled.append("Train")
            if bus_required and any("Bus" in transport.get("transport_type", []) for transport in transport_list):
                transport_fulfilled.append("Bus")

            # If any transport types are fulfilled, add them to fulfilled; otherwise, add to not_fulfilled
            if transport_fulfilled:
                fulfilled.append(f"Transport ({', '.join(transport_fulfilled)})")
            else:
                not_fulfilled.append("Transport")

        # Budget check
        if budget > 0:
            avg_price = neighborhood.get("home_prices", {}).get("avg_price", float("inf"))
            if avg_price <= budget:
                fulfilled.append("Budget")
            else:
                not_fulfilled.append("Budget")

        # Print fulfilled and not fulfilled
        print("  - Fulfilled:", ", ".join(fulfilled) if fulfilled else "None")
        print("  - Not Fulfilled:", ", ".join(not_fulfilled) if not_fulfilled else "None")
else:
    print("No neighborhoods match your criteria.")


Please specify your preferences:


Enter the faculty you're looking for (e.g., Economia):  Fisica
Do you need a dog park? (yes/no):  no
Do you need a library? (yes/no):  yes
Enter the type of restaurant you prefer (e.g., Italian):  Seafood
Enter the minimum number of restaurants you want:  6
Do you need a coworking space? (yes/no):  yes
Do you need a sport venue? (yes/no):  yes
Enter the sport venue category (e.g., Piscina, Atletica) [optional]:  Tennis
Do you need a supermarket? (yes/no):  yes
Do you need a museum? (yes/no):  no
Do you need a pharmacy? (yes/no):  yes
Do you need a playground? (yes/no):  no
Do you need public transport? (yes/no):  yes
Do you need metro service? (yes/no):  yes
Do you need train service? (yes/no):  no
Do you need bus service? (yes/no):  yes
Enter your budget for home prices (price in euros per square meter):  3500



=== Top 3 Suitable Neighborhoods ===

1. Città Studi (Score: 10)
  - Average Price: €3947.5
  - Fulfilled: Faculty, Restaurant Category, Minimum Number of Restaurants, Library, Coworking Space, Sport Venue, Specific Sport Venue Category, Supermarket, Pharmacy, Transport (Metro, Bus)
  - Not Fulfilled: Budget

2. Stadera (Score: 9)
  - Average Price: €2990.625
  - Fulfilled: Minimum Number of Restaurants, Library, Coworking Space, Sport Venue, Specific Sport Venue Category, Supermarket, Pharmacy, Transport (Metro, Bus), Budget
  - Not Fulfilled: Faculty, Restaurant Category

3. Quarto Cagnino (Score: 9)
  - Average Price: €3086.4583333333335
  - Fulfilled: Restaurant Category, Minimum Number of Restaurants, Coworking Space, Sport Venue, Specific Sport Venue Category, Supermarket, Pharmacy, Transport (Bus), Budget
  - Not Fulfilled: Faculty, Library
