In [1]:
import csv, json
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

with open("private.json", "r") as f:
    private = json.load(f)

uri = private["MONGO_ATLAS_URI"]

client = MongoClient(uri, server_api=ServerApi("1"))

# Send a ping to confirm a successful connection
try:
    client.admin.command("ping")
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)


Pinged your deployment. You successfully connected to MongoDB!


In [10]:
csv_file_path = "ohio-100-largest-cities.csv"
json_file_path = "ohio-100-largest-cities.json"

cities_data = []
with open(csv_file_path, "r") as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        mhi = row["Median Household Income"]
        mhi = float(mhi.replace("$","").replace(",",""))
        row["Median Household Income"] = mhi
        ahp = row["Average Home Price"]
        ahp = float(ahp.replace("$","").replace(",",""))
        row["Average Home Price"] = ahp
        cities_data.append(row)

cities_data[0:10]

with open(json_file_path, "w") as json_file:
    json.dump(cities_data, json_file, indent=4)

print(f"Data has been converted to JSON and saved to {json_file_path}")

Data has been converted to JSON and saved to ohio-100-largest-cities.json


In [13]:
# Paths to the input JSON and output GeoJSON files
input_json_file = "ohio-100-largest-cities.json"
output_geojson_file = "ohio-100-largest-cities.geojson"

# Read the input JSON file
with open(input_json_file, "r") as json_file:
    cities_data = json.load(json_file)

# Create a GeoJSON template
geojson_data = {"type": "FeatureCollection", "features": []}

# Convert the JSON data into GeoJSON features
for city in cities_data:
    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [float(city["Longitude"]), float(city["Latitude"])],
        },
        "properties": {
            "City": city["City"],
            "Population": int(city["Population"]),
            "Median Household Income": city["Median Household Income"],
            "Average Home Price": city["Average Home Price"],
        },
    }
    geojson_data["features"].append(feature)

# Save the GeoJSON data to a file
with open(output_geojson_file, "w") as geojson_file:
    json.dump(geojson_data, geojson_file, indent=4)

print(f"GeoJSON data has been saved to {output_geojson_file}")

GeoJSON data has been saved to ohio-100-largest-cities.geojson


In [14]:
ohio_db = client.ohio_db
ohio_db.drop_collection("cities")
cities_collection = ohio_db.cities

# Path to the GeoJSON file
geojson_file_path = "ohio-100-largest-cities.geojson"

# Load the GeoJSON file
with open(geojson_file_path, "r") as geojson_file:
    geojson_data = json.load(geojson_file)

# Insert GeoJSON features into the MongoDB collection
for feature in geojson_data["features"]:
    cities_collection.insert_one(feature)

print("GeoJSON data has been inserted into the 'cities' collection.")

GeoJSON data has been inserted into the 'cities' collection.


In [15]:
cities_collection.create_index([("geometry", "2dsphere")])
print("2dsphere index created on 'geometry' field.")

2dsphere index created on 'geometry' field.


In [23]:
import math

# Cleveland's coordinates
cleveland_coords = [-81.6944, 41.4993]  # [longitude, latitude]

# Radius in miles
radius_in_miles = 50
radius_in_radians = (
    radius_in_miles / 3963.2
)  # Convert miles to radians (Earth's radius in miles)

# Query for cities within 50 miles of Cleveland
nearby_cities = cities_collection.find(
    {
        "geometry": {
            "$geoWithin": {"$centerSphere": [cleveland_coords, radius_in_radians]}
        }
    }
)

# Print the cities
print("Cities within 50 miles of Cleveland:")
total_population = 0
weighted_home_price = 0
for city in nearby_cities:
    print(city["properties"]["City"], "-", city["properties"]["Population"])
    total_population += city["properties"]["Population"]
    weighted_home_price += city["properties"]["Population"] * city["properties"]["Average Home Price"]
metro_home_price = weighted_home_price / total_population
print(f"Metro Home Price = {metro_home_price}")


Cities within 50 miles of Cleveland:
Elyria - 52656
Lorain - 65211
Akron - 189347
Parma - 81146
Cleveland - 362656
Lakewood - 50002
Cuyahoga Falls - 49144
Mentor - 47450
Metro Home Price = 161072.4569747285


In [22]:
import math

# Columbus coordinates
columbus_coords = [-82.9988, 39.9612] 

# Radius in miles
radius_in_miles = 50
radius_in_radians = (
    radius_in_miles / 3963.2
)  # Convert miles to radians (Earth's radius in miles)

# Query for cities within 50 miles of Columbus
nearby_cities = cities_collection.find(
    {
        "geometry": {
            "$geoWithin": {"$centerSphere": [columbus_coords, radius_in_radians]}
        }
    }
)

# Print the cities
print("Cities within 50 miles of Columbus:")
total_population = 0
weighted_home_price = 0
for city in nearby_cities:
    print(city["properties"]["City"], "-", city["properties"]["Population"])
    total_population += city["properties"]["Population"]
    weighted_home_price += city["properties"]["Population"] * city["properties"]["Average Home Price"]
metro_home_price = weighted_home_price / total_population
print(f"Metro Home Price = {metro_home_price}")
    

Cities within 50 miles of Columbus:
Newark - 49934
Columbus - 913175
Springfield - 58662
Metro Home Price = 325627.3905796896
