# NYC Apartment Search

_[Project prompt](https://docs.google.com/document/d/1ogme9BJeHb2IZ6UREavUorF--nnxoWCYAAi8AZ4Q5jQ/edit?usp=sharing) and [grading rubric](https://docs.google.com/document/d/1XI9Yq_e-U-D3iH4jTPAtNteeP2Q9mtJ9NKbePWKeN_g/edit?usp=sharing)

_This scaffolding notebook may be used to help setup your final project. It's **totally optional** whether you make use of this or not._

_If you do use this notebook, everything provided is optional as well - you may remove or add code as you wish._

_**All code below should be consider "pseudo-code" - not functional by itself, and only an idea of a possible approach.**_

## Setup

In [3]:
# All import statements needed for the project, for example:

import json
import pathlib
import urllib.parse

import geoalchemy2 as gdb
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import requests
import shapely
import sqlalchemy as db

from sqlalchemy.orm import declarative_base

In [4]:
!createdb aendta_db

createdb: error: database creation failed: ERROR:  database "aendta_db" already exists


In [5]:
!psql --dbname aendta_db -c 'CREATE EXTENSION postgis;'

ERROR:  extension "postgis" already exists


In [7]:
# Any constants you might need; some have been added for you

# Where data files will be read from/written to - this should already exist
DATA_DIR = pathlib.Path("data")
ZIPCODE_DATA_FILE = DATA_DIR / "nyc_zipcodes" / "nyc_zipcodes.shp"
ZILLOW_DATA_FILE = DATA_DIR / "zillow_rent_data.csv"

NYC_DATA_APP_TOKEN = "Lowr54ShQCxjzTIFPwsC6ZSPm"
BASE_NYC_DATA_URL = "https://data.cityofnewyork.us/"
NYC_DATA_311 = "erm2-nwe9.geojson"
NYC_DATA_TREES = "5rq2-4hqu.geojson"

DB_NAME = "aendta_db"
DB_USER = "alexander"
DB_URL = f"postgresql+psycopg2://{DB_USER}@localhost/{DB_NAME}"
DB_SCHEMA_FILE = "schema.sql"
# directory where DB queries for Part 3 will be saved
QUERY_DIR = pathlib.Path("queries")

In [8]:
# Make sure the QUERY_DIRECTORY & DATA_DIR exists
if not DATA_DIR.exists():
    DATA_DIR.mkdir()
if not QUERY_DIR.exists():
    QUERY_DIR.mkdir()

## Part 1: Data Preprocessing

In [6]:
import urllib.parse
import requests
import json
from pathlib import Path

DATA_DIR = Path("data")

def download_nyc_geojson_data(url, force=False):
    parsed_url = urllib.parse.urlparse(url)
    url_path = parsed_url.path.strip("/")
    filename = DATA_DIR / (url_path.replace("/", "_") + ".geojson") 
    
    if force or not filename.exists(): 
        print(f"Downloading {url} to {filename}...")
        response = requests.get(url)
        response.raise_for_status()  
        
        with open(filename, "w") as f:
            json.dump(response.json(), f) 
        
        print(f"Done downloading {url}.")
    
    else:
        print(f"Reading from {filename}...")
    
    return filename

In [51]:
def load_and_clean_zipcodes(zipcode_datafile):
    zipcodes_df = pd.read_csv("data/nyc_zipcodes-1.csv")
    keep_columns = ["ZIPCODE", "PO_NAME", "POPULATION", "BLDGZIP", "AREA", "COUNTY", "CTY_FIPS"]
    zipcodes_df = zipcodes_df[keep_columns]
    zipcodes_df = zipcodes_df.dropna()
    zipcodes_df.columns = zipcodes_df.columns.str.strip().str.lower()
    zipcodes_df["zipcode"] = zipcodes_df["zipcode"].astype(str)
        
    return zipcodes_df
zipcode_datafile = 'data/nyc_zipcodes-1.csv'  # Replace with your file path
df_zipcodes = load_and_clean_zipcodes(zipcode_datafile)
df_zipcodes

FileNotFoundError: [Errno 2] No such file or directory: 'data/nyc_zipcodes-1.csv'

In [11]:
import requests
import os
from pathlib import Path

data_dir = Path("data")
data_dir.mkdir(exist_ok=True)  
application_token = os.getenv(NYC_DATA_APP_TOKEN)
url_311 = "https://data.cityofnewyork.us/resource/erm2-nwe9.json"

def download_and_clean_311_data(_331_datafile):
    headers = {"X-App-Token": NYC_DATA_APP_TOKEN}
    response = requests.get(url_311, headers=headers)
    response.raise_for_status() 
    
    filename_311 = data_dir / "311_data.json"
    with open(filename_311, "w") as f:
        f.write(response.text) 
    
    data_311 = pd.read_json(filename_311) 
    keep_columns = ["created_date", "complaint_type", "latitude", "longitude", "borough"]
    data_311 = data_311[keep_columns]
    data_311 = data_311.dropna()  
    data_311.columns = data_311.columns.str.strip().str.lower()  # Standardize column names
    
    return data_311
_311_datafile = 'data/311_data.json' 
df_311 = download_and_clean_311_data(_311_datafile)
df_311

Unnamed: 0,created_date,complaint_type,latitude,longitude,borough
0,2024-04-24T01:51:03.000,Noise - Residential,40.809497,-73.941700,MANHATTAN
1,2024-04-24T01:49:23.000,Noise - Residential,40.853718,-73.864825,BRONX
2,2024-04-24T01:48:53.000,Blocked Driveway,40.757165,-73.913862,QUEENS
3,2024-04-24T01:47:55.000,Noise - Residential,40.809497,-73.941700,MANHATTAN
4,2024-04-24T01:47:52.000,Blocked Driveway,40.603881,-73.960620,BROOKLYN
...,...,...,...,...,...
995,2024-04-23T21:46:22.000,PAINT/PLASTER,40.653202,-73.960533,BROOKLYN
996,2024-04-23T21:46:22.000,UNSANITARY CONDITION,40.653202,-73.960533,BROOKLYN
997,2024-04-23T21:46:18.000,Sidewalk Condition,40.761623,-73.960484,MANHATTAN
998,2024-04-23T21:45:34.000,New Tree Request,40.808476,-73.955124,MANHATTAN


In [13]:
!pip install pyogrio

def download_and_clean_tree_data(tree_datafile):
    # Download the tree data
    url_tree = "https://data.cityofnewyork.us/resource/5rq2-4hqu.geojson"
    headers = {"X-App-Token": NYC_DATA_APP_TOKEN}  
    response = requests.get(url_tree, headers=headers)
    response.raise_for_status()  

    filename_tree = data_dir / "treesdata.geojson"  
    with open(filename_tree, "w") as f:
        f.write(response.text)  

    data_tree = gpd.read_file("GeoJSON:data/treesdata.geojson") 
    data_tree = gpd.GeoDataFrame(data_tree, geometry='geometry')

    keep_columns = ["tree_id", 'created_at', 'block_id', 'tree_dbh', 'status','spc_latin','spc_common','zip_city', "boroname", "latitude", "longitude", 'geometry', 'nta', 'health','zipcode','address','nta_name']
    data_tree = data_tree[keep_columns]  
    data_tree = data_tree.dropna(subset=["tree_id", "latitude", "longitude"])  
    data_tree.columns = data_tree.columns.str.strip().str.lower() 
    data_tree = data_tree.to_crs(epsg=2029) 

    return data_tree

tree_datafile = 'data/treesdata.geojson'
geodf_trees = download_and_clean_tree_data(tree_datafile)
geodf_trees




Unnamed: 0,tree_id,created_at,block_id,tree_dbh,status,spc_latin,spc_common,zip_city,boroname,latitude,longitude,geometry,nta,health,zipcode,address,nta_name
0,180683,08/27/2015,348711,3,Alive,Acer rubrum,red maple,Forest Hills,Queens,40.72309177,-73.84421522,POINT (1104574.224 4532506.823),QN17,Fair,11375,108-005 70 AVENUE,Forest Hills
1,200540,09/03/2015,315986,21,Alive,Quercus palustris,pin oak,Whitestone,Queens,40.79411067,-73.81867946,POINT (1106084.075 4540576.364),QN49,Fair,11357,147-074 7 AVENUE,Whitestone
2,204026,09/05/2015,218365,3,Alive,Gleditsia triacanthos var. inermis,honeylocust,Brooklyn,Brooklyn,40.71758074,-73.9366077,POINT (1096811.875 4531258.710),BK90,Good,11211,390 MORGAN AVENUE,East Williamsburg
3,204337,09/05/2015,217969,10,Alive,Gleditsia triacanthos var. inermis,honeylocust,Brooklyn,Brooklyn,40.71353749,-73.93445616,POINT (1097030.131 4530824.104),BK90,Good,11211,1027 GRAND STREET,East Williamsburg
4,189565,08/30/2015,223043,21,Alive,Tilia americana,American linden,Brooklyn,Brooklyn,40.66677776,-73.97597938,POINT (1093936.292 4525344.846),BK37,Good,11215,603 6 STREET,Park Slope-Gowanus
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,191374,08/31/2015,415497,1,Dead,,,Staten Island,Staten Island,40.56929363,-74.12944305,POINT (1081800.110 4513481.110),SI25,,10306,86 EAST BROADWAY,Oakwood-Oakwood Beach
996,177355,08/26/2015,224301,10,Alive,Acer platanoides,Norway maple,Brooklyn,Brooklyn,40.68505281,-74.00015216,POINT (1091728.251 4527211.708),BK33,Poor,11231,178 DE GRAW STREET,Carroll Gardens-Columbia Street-Red Hook
997,195035,09/01/2015,229940,25,Alive,Acer platanoides,Norway maple,Brooklyn,Brooklyn,40.62479203,-73.95751667,POINT (1095874.093 4520804.715),BK43,Good,11230,1054 EAST 19 STREET,Midwood
998,177062,08/26/2015,409315,18,Alive,Acer platanoides,Norway maple,Staten Island,Staten Island,40.5925856,-74.09660154,POINT (1084379.322 4516288.001),SI36,Good,10304,93 DELAWARE AVENUE,Old Town-Dongan Hills-South Beach


In [15]:
data_dir = Path("data")

def load_and_clean_zillow_data(zillow_datafile):
    zillow_datafile = data_dir / "zillow_rent_data.csv"  
    zillow_df = pd.read_csv(zillow_datafile)
    zillow_df = zillow_df.drop(columns = ['StateName', 'RegionType','Metro', 'City'])
    zillow_df = zillow_df.dropna()  
    zillow_df.columns = zillow_df.columns.str.strip().str.lower()  
    
    return zillow_df

zillow_datafile = 'data/zillow_rent_data.csv'  # Replace with your file path
cleaned_zillow = load_and_clean_zillow_data(zillow_datafile)
df_zillow = cleaned_zillow[cleaned_zillow['state'] == 'NY']
df_zillow

Unnamed: 0,regionid,sizerank,regionname,state,countyname,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,...,2023-04-30,2023-05-31,2023-06-30,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
16,62037,19,11226,NY,Kings County,1868.105728,1893.672364,1969.589733,1986.212577,2000.92847,...,2613.65965,2633.525698,2662.290709,2679.586231,2693.602946,2672.819878,2644.929685,2644.410748,2652.155298,2680.780024
25,61639,29,10025,NY,New York County,3098.357195,3106.023721,3104.234893,3142.021449,3164.276824,...,3817.813149,3904.197355,3995.762151,4043.179089,4066.639389,4063.470382,3986.671655,3894.029312,3791.01584,3792.998965
42,62017,47,11206,NY,Kings County,2346.753296,2347.893103,2424.447241,2457.035473,2535.944844,...,3397.065656,3406.346311,3466.232099,3493.401248,3499.914375,3506.426102,3483.233643,3406.711154,3348.781644,3303.408189
43,62032,48,11221,NY,Kings County,2052.397221,2063.723462,2063.476048,2113.173098,2125.333487,...,2971.613068,3007.409687,3045.702391,3091.158895,3104.550904,3115.142613,3095.22723,3057.308159,3013.185441,3020.454439
67,62045,73,11235,NY,Kings County,1703.266127,1747.275109,1773.855593,1828.410059,1833.244388,...,2466.975949,2407.638118,2449.74819,2453.754288,2487.964903,2500.224485,2515.695716,2569.989624,2527.888679,2566.480159
75,62043,81,11233,NY,Kings County,1781.09167,1789.585683,1766.338883,1808.226203,1870.879467,...,2707.950319,2751.723607,2789.296542,2821.627388,2823.059273,2836.253053,2782.965511,2749.438133,2713.581236,2735.835088
111,61643,122,10029,NY,New York County,2284.781709,2284.183246,2289.389182,2307.728976,2327.186327,...,2875.356843,2890.985584,2899.518149,2907.073972,2917.961442,2894.956336,2882.648971,2860.84398,2852.999555,2827.066667
128,61616,139,10002,NY,New York County,2792.959154,2779.077623,2755.580309,2793.35896,2822.757022,...,3558.843588,3608.718986,3669.505689,3681.631589,3692.901772,3685.912748,3653.724909,3589.821246,3512.80189,3507.375801
132,62087,143,11375,NY,Queens County,1776.128861,1757.548582,1767.367753,1768.708606,1774.195182,...,2398.062756,2411.634909,2501.606347,2532.176614,2517.768526,2550.194396,2531.740588,2515.355776,2480.098428,2502.5625
160,62026,172,11215,NY,Kings County,2434.603096,2474.330994,2514.070872,2559.933439,2590.630277,...,3397.712597,3454.225652,3482.462766,3495.590867,3483.123376,3551.134516,3578.595886,3565.727021,3517.447805,3488.081589


In [16]:
def load_all_data():
    df_zipcodes = load_and_clean_zipcodes(zipcode_datafile)
    df_311 = download_and_clean_311_data(_311_datafile)
    geodf_trees = download_and_clean_tree_data(tree_datafile)
    df_zillow = load_and_clean_zillow_data(zillow_datafile)
    return (
        df_zipcodes,
        df_311,
        geodf_trees,
        df_zillow
    )

In [17]:
df_zipcodes, df_311, df_zillow, geodf_trees = load_all_data()

In [18]:
# Show basic info about each dataframe
df_zipcodes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 263 entries, 0 to 262
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   zipcode     263 non-null    object 
 1   po_name     263 non-null    object 
 2   population  263 non-null    float64
 3   bldgzip     263 non-null    int64  
 4   area        263 non-null    float64
 5   county      263 non-null    object 
 6   cty_fips    263 non-null    int64  
dtypes: float64(2), int64(2), object(3)
memory usage: 14.5+ KB


In [19]:
# Show first 5 entries about each dataframe
df_zipcodes.head()

Unnamed: 0,zipcode,po_name,population,bldgzip,area,county,cty_fips
0,11436,Jamaica,18681.0,0,22699300.0,Queens,81
1,11213,Brooklyn,62426.0,0,29631000.0,Kings,47
2,11212,Brooklyn,83866.0,0,41972100.0,Kings,47
3,11225,Brooklyn,56527.0,0,23698630.0,Kings,47
4,11218,Brooklyn,72280.0,0,36868800.0,Kings,47


In [21]:
df_311.info()

<class 'pandas.core.frame.DataFrame'>
Index: 992 entries, 0 to 999
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   created_date    992 non-null    object 
 1   complaint_type  992 non-null    object 
 2   latitude        992 non-null    float64
 3   longitude       992 non-null    float64
 4   borough         992 non-null    object 
dtypes: float64(2), object(3)
memory usage: 46.5+ KB


In [20]:
df_311.head()

Unnamed: 0,created_date,complaint_type,latitude,longitude,borough
0,2024-04-24T01:51:03.000,Noise - Residential,40.809497,-73.9417,MANHATTAN
1,2024-04-24T01:49:23.000,Noise - Residential,40.853718,-73.864825,BRONX
2,2024-04-24T01:48:53.000,Blocked Driveway,40.757165,-73.913862,QUEENS
3,2024-04-24T01:47:55.000,Noise - Residential,40.809497,-73.9417,MANHATTAN
4,2024-04-24T01:47:52.000,Blocked Driveway,40.603881,-73.96062,BROOKLYN


In [22]:
geodf_trees.info()

<class 'pandas.core.frame.DataFrame'>
Index: 579 entries, 0 to 6479
Columns: 114 entries, regionid to 2024-01-31
dtypes: float64(109), int64(3), object(2)
memory usage: 520.2+ KB


In [23]:
geodf_trees.head()

Unnamed: 0,regionid,sizerank,regionname,state,countyname,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,...,2023-04-30,2023-05-31,2023-06-30,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
0,91982,1,77494,TX,Fort Bend County,1471.214336,1477.03306,1484.755201,1492.248132,1495.544608,...,1843.953065,1853.54622,1860.80506,1873.335787,1879.08048,1882.092604,1877.636803,1857.636589,1846.701735,1839.65496
2,91940,3,77449,TX,Harris County,1285.448996,1283.387323,1290.731322,1302.78233,1313.46357,...,1799.232097,1803.978538,1815.603187,1824.661645,1837.338997,1837.127291,1822.343233,1809.231267,1813.118556,1830.410884
8,91926,11,77433,TX,Harris County,1350.951013,1346.486674,1353.51516,1362.401443,1363.458758,...,1882.654529,1881.87345,1908.720996,1941.138293,1965.196968,1997.556435,1984.275828,1962.842219,1886.411739,1867.679966
16,62037,19,11226,NY,Kings County,1868.105728,1893.672364,1969.589733,1986.212577,2000.92847,...,2613.65965,2633.525698,2662.290709,2679.586231,2693.602946,2672.819878,2644.929685,2644.410748,2652.155298,2680.780024
18,70829,21,30044,GA,Gwinnett County,1102.686034,1113.393291,1122.270029,1124.84742,1128.329575,...,2058.256657,2050.162966,2032.137633,1993.185884,1991.503042,1935.298976,1946.574119,1920.301442,1987.4121,1995.192319


In [24]:
df_zillow.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 17 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   tree_id     1000 non-null   object  
 1   created_at  1000 non-null   object  
 2   block_id    1000 non-null   object  
 3   tree_dbh    1000 non-null   object  
 4   status      1000 non-null   object  
 5   spc_latin   973 non-null    object  
 6   spc_common  973 non-null    object  
 7   zip_city    1000 non-null   object  
 8   boroname    1000 non-null   object  
 9   latitude    1000 non-null   object  
 10  longitude   1000 non-null   object  
 11  geometry    1000 non-null   geometry
 12  nta         1000 non-null   object  
 13  health      973 non-null    object  
 14  zipcode     1000 non-null   object  
 15  address     1000 non-null   object  
 16  nta_name    1000 non-null   object  
dtypes: geometry(1), object(16)
memory usage: 132.9+ KB


In [25]:
df_zillow.head()

Unnamed: 0,tree_id,created_at,block_id,tree_dbh,status,spc_latin,spc_common,zip_city,boroname,latitude,longitude,geometry,nta,health,zipcode,address,nta_name
0,180683,08/27/2015,348711,3,Alive,Acer rubrum,red maple,Forest Hills,Queens,40.72309177,-73.84421522,POINT (1104574.224 4532506.823),QN17,Fair,11375,108-005 70 AVENUE,Forest Hills
1,200540,09/03/2015,315986,21,Alive,Quercus palustris,pin oak,Whitestone,Queens,40.79411067,-73.81867946,POINT (1106084.075 4540576.364),QN49,Fair,11357,147-074 7 AVENUE,Whitestone
2,204026,09/05/2015,218365,3,Alive,Gleditsia triacanthos var. inermis,honeylocust,Brooklyn,Brooklyn,40.71758074,-73.9366077,POINT (1096811.875 4531258.710),BK90,Good,11211,390 MORGAN AVENUE,East Williamsburg
3,204337,09/05/2015,217969,10,Alive,Gleditsia triacanthos var. inermis,honeylocust,Brooklyn,Brooklyn,40.71353749,-73.93445616,POINT (1097030.131 4530824.104),BK90,Good,11211,1027 GRAND STREET,East Williamsburg
4,189565,08/30/2015,223043,21,Alive,Tilia americana,American linden,Brooklyn,Brooklyn,40.66677776,-73.97597938,POINT (1093936.292 4525344.846),BK37,Good,11215,603 6 STREET,Park Slope-Gowanus


## Part 2: Storing Data

In [9]:
import psycopg2
from psycopg2 import sql

def setup_new_postgis_database(username, db_name):
    #Connect to the default database
    conn = psycopg2.connect(f"dbname=postgres user={username}")
    conn.autocommit = True 
    cursor = conn.cursor()
    
    #Create the new database
    try:
        cursor.execute(sql.SQL("CREATE DATABASE {}").format(sql.Identifier(db_name)))
        print(f"Database {db_name} created successfully.")
    except psycopg2.DatabaseError as e:
        print(f"An error occurred: {e}")
        return
    
    #Connect to the newly created database
    conn.close()
    conn = psycopg2.connect(f"dbname={db_name} user={username}")
    conn.autocommit = True
    cursor = conn.cursor()
    
    #Enable PostGIS extension
    try:
        cursor.execute("CREATE EXTENSION postgis;")
        print("PostGIS extension created successfully.")
    except psycopg2.DatabaseError as e:
        print(f"An error occurred: {e}")
    
    #Close connection
    conn.close()

#Test
setup_new_postgis_database('alexander', 'aendta_db')


An error occurred: database "aendta_db" already exists



In [10]:
setup_new_postgis_database(DB_USER, DB_NAME)

An error occurred: database "aendta_db" already exists



### Creating Tables


These are just a couple of options to creating your tables; you can use one or the other, a different method, or a combination.

In [11]:
engine = db.create_engine(DB_URL)

NoSuchModuleError: Can't load plugin: sqlalchemy.dialects:postgres.psycopg2

#### Option 1: SQL

In [None]:
# If using SQL (as opposed to SQLAlchemy), define the SQL statements to create your 4 tables.
# You may be creating more tables depending on how you're setting up your constraints/relationships
# or if you're completing the extra credit.
ZIPCODE_SCHEMA = """
TODO
"""

ZIPCODE_G

NYC_311_SCHEMA = """
TODO
"""

NYC_TREE_SCHEMA = """
TODO
"""

ZILLOW_SCHEMA = """
TODO
"""

In [None]:
# create that required schema.sql file
with open(DB_SCHEMA_FILE, "w") as f:
    f.write(ZIPCODE_SCHEMA)
    f.write(NYC_311_SCHEMA)
    f.write(NYC_TREE_SCHEMA)
    f.write(ZILLOW_SCHEMA)

In [None]:
# If using SQL (as opposed to SQLAlchemy), execute the schema files to create tables
with engine.connect() as connection:
    pass

#### Option 2: SQLAlchemy

In [None]:
Base = declarative_base()

class Tree(Base):
    __tablename__ = "trees"

    ...


In [None]:
Base.metadata.create_all(engine)

### Add Data to Database

These are just a couple of options to write data to your tables; you can use one or the other, a different method, or a combination.

#### Option 1: SQL

In [None]:
def write_dataframes_to_table(tablename_to_dataframe):
    # write INSERT statements or use pandas/geopandas to write SQL
    raise NotImplemented()

In [None]:
tablename_to_dataframe = {
    "zipcodes": geodf_zipcode_data,
    "complaints": geodf_311_data,
    "trees": geodf_tree_data,
    "rents": df_zillow_data,
}

In [None]:
write_dataframes_to_table(tablename_to_dataframe)

#### Option 2: SQLAlchemy

In [None]:
Session = db.orm.sessionmaker(bind=engine)
session = Session()

In [None]:
for row in geodf_tree_data.iterrows():
    tree = Tree(...)
    session.add(tree)

In [None]:
session.commit()

## Part 3: Understanding the Data

### Query 1

In [None]:
# Helper function to write the queries to file
def write_query_to_file(query, outfile):
    raise NotImplementedError()

In [None]:
QUERY_1_FILENAME = QUERY_DIR / "FILL_ME_IN"

QUERY_1 = """
FILL_ME_IN
"""

In [None]:
with engine.connect() as conn:
    result = conn.execute(db.text(QUERY_1))
    for row in result:
        print(row)

In [None]:
write_query_to_file(QUERY_1, QUERY_1_FILENAME)

## Part 4: Visualizing the Data

### Visualization 1

In [None]:
# use a more descriptive name for your function
def plot_visual_1(dataframe):
    figure, axes = plt.subplots(figsize=(20, 10))
    
    values = "..."  # use the dataframe to pull out values needed to plot
    
    # you may want to use matplotlib to plot your visualizations;
    # there are also many other plot types (other 
    # than axes.plot) you can use
    axes.plot(values, "...")
    # there are other methods to use to label your axes, to style 
    # and set up axes labels, etc
    axes.set_title("Some Descriptive Title")
    
    plt.show()

In [None]:
def get_data_for_visual_1():
    # Query your database for the data needed.
    # You can put the data queried into a pandas/geopandas dataframe, if you wish
    raise NotImplementedError()

In [None]:
some_dataframe = get_data_for_visual_1()
plot_visual_1(some_dataframe)