In [1]:
import psycopg2
import csv
import pandas as pd

In [2]:
def copy_to_postgres(filename, pg_cursor, table_name):
    with open(filename, 'r', encoding='utf-8-sig') as file:
        next(file)
        pg_cursor.copy_from(file, table_name, sep=',') 

In [3]:
########################### MUST CREATE CONNECTION to Postgres before any calls are made
pg_conn = psycopg2.connect(
        dbname="postgres",
        user="postgres",
        password="darkdreamer17?",  # change this to your own password
        host="localhost",
        port="5432"
    )
pg_conn.autocommit = True
pg_cursor = pg_conn.cursor()


# 311 service requests for vacant/abandoned buildings
create_311_table = '''
    CREATE TABLE IF NOT EXISTS service_requests (
    SR_NUMBER VARCHAR(50),
    SR_TYPE VARCHAR(100),
    SR_SHORT_CODE VARCHAR(10),
    CREATED_DEPARTMENT VARCHAR(100),
    OWNER_DEPARTMENT VARCHAR(100),
    STATUS VARCHAR(50),
    ORIGIN VARCHAR(50),
    CREATED_DATE TIMESTAMP,
    LAST_MODIFIED_DATE TIMESTAMP,
    CLOSED_DATE TIMESTAMP,
    STREET_ADDRESS VARCHAR(255),
    CITY VARCHAR(100),
    STATE VARCHAR(50),
    ZIP_CODE VARCHAR(20),
    STREET_NUMBER VARCHAR(20),
    STREET_DIRECTION VARCHAR(10),
    STREET_NAME VARCHAR(100),
    STREET_TYPE VARCHAR(20),
    DUPLICATE BOOLEAN,
    LEGACY_RECORD BOOLEAN,
    LEGACY_SR_NUMBER VARCHAR(50),
    PARENT_SR_NUMBER VARCHAR(50),
    COMMUNITY_AREA INT,
    WARD INT,
    ELECTRICAL_DISTRICT VARCHAR(10),
    ELECTRICITY_GRID VARCHAR(10),
    POLICE_SECTOR VARCHAR(10),
    POLICE_DISTRICT INT,
    POLICE_BEAT VARCHAR(10),
    PRECINCT INT,
    SANITATION_DIVISION_DAYS INT,
    CREATED_HOUR INT,
    CREATED_DAY_OF_WEEK INT,
    CREATED_MONTH INT,
    X_COORDINATE DOUBLE PRECISION,
    Y_COORDINATE DOUBLE PRECISION,
    LATITUDE DOUBLE PRECISION,
    LONGITUDE DOUBLE PRECISION,
    LOCATION VARCHAR(100),
    Boundaries_ZIP_Codes INT,
    Community_Areas INT,
    Zip_Codes INT,
    Census_Tracts INT,
    Wards INT,
    Boundaries_Wards_2023 INT
    );
    '''
pg_cursor.execute(create_311_table)


# community area data
create_area_table = '''
    CREATE TABLE IF NOT EXISTS communityareas (
    CommunityAreaNumber INT PRIMARY KEY,
    CommunityAreaName VARCHAR(255),
    PercentHousingCrowded DECIMAL(4, 1),
    PercentHouseholdsBelowPoverty DECIMAL(4, 1),
    PercentAged16PlusUnemployed DECIMAL(4, 1),
    PercentAged25PlusNoHighSchoolDiploma DECIMAL(4, 1),
    PercentAgedUnder18OrOver64 DECIMAL(4, 1),
    PerCapitaIncome VARCHAR,
    HardshipIndex VARCHAR
    );
    '''
pg_cursor.execute(create_area_table)


# crime data
# create_crime_table = '''
#     CREATE TABLE IF NOT EXISTS CrimeData (
#     ID INT PRIMARY KEY,
#     CaseNumber VARCHAR(20),
#     Date TIMESTAMP,
#     Block VARCHAR(50),
#     IUCR TEXT,
#     PrimaryType TEXT,
#     Description TEXT,
#     LocationDescription VARCHAR(100),
#     Arrest BOOLEAN,
#     Domestic BOOLEAN,
#     Beat INT,
#     District INT,
#     Ward INT,
#     CommunityArea INT,
#     FBICode VARCHAR(5),
#     XCoordinate INT,
#     YCoordinate INT,
#     Year INT,
#     UpdatedOn TIMESTAMP,
#     Latitude FLOAT,
#     Longitude FLOAT,
#     Location TEXT
#     );
#     '''
# pg_cursor.execute(create_crime_table)


# copy data from csv to tables
# use copy expert because column values contain ','
# with open('ChicagoCrimes2022.csv') as csv_file:
#     pg_cursor.copy_expert('''COPY CrimeData from stdin with csv header''', csv_file)

with open('311_Service_Requests_vacancies.csv') as f:
    pg_cursor.copy_expert('''COPY service_requests FROM STDIN WITH CSV HEADER''', f)
    

# first subquery: group neighborhoodsand sort by aggregate count (descending) of vacant property complaints
query = '''
    WITH miniquery AS (
        SELECT DISTINCT communityareas.communityareaname AS community_name, 
        service_requests.community_areas AS community_id, COUNT(*) AS num_vacancies
        FROM service_requests INNER JOIN communityareas 
        ON service_requests.community_areas = communityareas.communityareanumber 
        GROUP BY communityareas.communityareaname, service_requests.community_areas
        ORDER BY service_requests.community_areas ASC
    )
    SELECT DISTINCT miniquery.community_name, miniquery.community_id, miniquery.num_vacancies, COUNT(*) as count_violent_crime
    FROM miniquery RIGHT OUTER JOIN CrimeData 
    ON miniquery.community_id = CrimeData.CommunityArea
    WHERE miniquery.num_vacancies >  (SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY miniquery.num_vacancies DESC) as percentile_50 FROM miniquery)
    AND CrimeData.PrimaryType ILIKE ANY(ARRAY['Criminal Sexual Assault', 'Assault', 'Battery', 'Homicide', 'Robbery', 'Motor Vehicle Theft'])
    GROUP BY miniquery.community_name, miniquery.community_id, miniquery.num_vacancies
    ORDER BY COUNT(*)
    LIMIT 20;
    '''
pg_cursor.execute(query)


result_cols = [description[0] for description in pg_cursor.description]
neighborhood_crimes_df = pd.DataFrame(pg_cursor.fetchall(), columns=result_cols)

In [4]:
####################### Close the PostgreSQL cursor and connection - MUST BE RUN AFTER ANY CALLS TO POSTGRES
if pg_cursor is not None:
    pg_cursor.close()
if pg_conn is not None:
    pg_conn.close()

In [5]:
neighborhood_crimes_df

Unnamed: 0,community_name,community_id,num_vacancies,count_violent_crime
0,Edison Park,9,18564,86
1,Burnside,47,11592,137
2,Mount Greenwood,74,26712,166
3,Hegewisch,55,7560,250
4,Beverly,72,47208,285
5,McKinley Park,59,42672,344
6,Clearing,64,34356,346
7,Archer Heights,57,8064,349
8,Pullman,50,85512,380
9,Avalon Park,45,123732,490


In [6]:
# import pymongo
from pymongo import MongoClient
from pymongo.errors import BulkWriteError
client = MongoClient()

import json
import bson
import dateutil
from dateutil import parser
import glob
import os
from os.path import isfile, join
path = os.getcwd() 
from pymongo import IndexModel, ASCENDING, DESCENDING
import re

In [7]:
client = MongoClient('localhost', 27017)  # start connection to mongodb server

db = client["chicago-neighborhoods-database"]  # create a database
neighborhoods = db["neighborhoods-collection"]  # create a collection in the database
db.neighborhoods.delete_many({})


DeleteResult({'n': 0, 'ok': 1.0}, acknowledged=True)

In [8]:
json_files = []  # store json file names in list

# function to load json files
def load_json(filepath):
    with open(filepath) as f:
        result = json.load(f)
    return result[0]


json_filepath = path = r'../data/jsons/*.json'
json_filepaths_list = glob.glob(json_filepath)
neigborhood_jsons = []


# get files for desired neighborhoods
community_list = neighborhood_crimes_df['community_name'].tolist()
for f in json_filepaths_list:
    f_stem = os.path.splitext(os.path.basename(f))[0]
    if f_stem in community_list: 
        print(f_stem)
        json_content = load_json(f)
        neigborhood_jsons.append(json_content)  # load each json file
        # Add filename as a field
        json_content["filename"] = f_stem
        # Insert into MongoDB
        db.neighborhoods.insert_one(json_content)

Dunning
West Lawn
Lincoln Square
Brighton Park
Calumet Heights
Archer Heights
Burnside
Hegewisch
Ashburn
Avalon Park
Edison Park
Hyde Park
Clearing
McKinley Park
Hermosa
Kenwood
Irving Park
Pullman
Beverly
Mount Greenwood


In [11]:
# Specify the list of words you are searching for
keywords = ["small business", "small and medium sized business", "festivals", "Fest", "live music", "parks", "peaceful", "museum", "museums", "stable", "historical"]

matching_ids = []
matching_neighborhoods = []
for document in db.neighborhoods.find():
    for field, value in document.items():
        if any(word.lower() in str(value).lower() for word in keywords):  # check if word is in document
            matching_ids.append(document['_id'])
            matching_neighborhoods.append(document['filename'])
            break  # Break to move on to the next document

query = {"_id": {"$in": matching_ids}}
print(len(matching_ids))
print(matching_neighborhoods)

13
['West Lawn', 'Lincoln Square', 'Calumet Heights', 'Archer Heights', 'Burnside', 'Hegewisch', 'Edison Park', 'Hyde Park', 'Hermosa', 'Irving Park', 'Pullman', 'Beverly', 'Mount Greenwood']
