In [2]:

import awswrangler
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import subprocess as sp
import yaml
from pyathena import connect
import geopandas as gpd
import folium
from pyathena.pandas.util import as_pandas
import folium
from folium.plugins import MarkerCluster
from IPython.core.display import display, HTML

# Set working to manual_update, standardize yaml and src locations
root = sp.getoutput("git rev-parse --show-toplevel")
os.chdir(os.path.join(root, "manual_flagging"))

# Inputs yaml as inputs
with open(os.path.join("yaml", "inputs_initial.yaml"), "r") as stream:
    inputs = yaml.safe_load(stream)

# Connect to athena
conn = connect(
    s3_staging_dir=os.getenv("AWS_ATHENA_S3_STAGING_DIR"),
    region_name=os.getenv("AWS_REGION"),
)

SQL_QUERY = """
SELECT
    sale.sale_price AS meta_sale_price,
    sale.sale_date AS meta_sale_date,
    sale.doc_no AS meta_sale_document_num,
    sale.sale_filter_ptax_flag,
    res.pin AS pin,
    res.class AS class,
    res.char_bldg_sf AS char_bldg_sf,
    latlong.lat AS lat,
    latlong.lon AS long
FROM default.vw_card_res_char res
INNER JOIN default.vw_pin_sale sale
    ON sale.pin = res.pin
    AND sale.year = res.year
LEFT JOIN default.vw_pin_universe latlong
    ON res.pin = latlong.pin
    AND res.year = latlong.year
WHERE (sale.sale_date
    BETWEEN DATE '2014-02-01'
    AND DATE '2021-01-31')
AND NOT sale.is_multisale
AND NOT res.pin_is_multicard;
"""

# Execute query and return as pandas df
cursor = conn.cursor()
cursor.execute(SQL_QUERY)
metadata = cursor.description
df_ingest = as_pandas(cursor)
df = df_ingest

In [10]:

# Assuming df is your dataframe
sample_size = 10000  # For example, if you want to sample 10,000 observations
sampled_df = df.sample(n=sample_size, random_state=42)

In [18]:
def determine_color(dummy_value):
    if dummy_value == 0:
        return 'blue'
    else:
        return 'red'

m = folium.Map(location=[41.8781, -87.6298], zoom_start=10)


for _, row in sampled_df.dropna(subset=['long']).iterrows():
    lat, lon, dummy = row['lat'], row['long'], row['sale_filter_ptax_flag']
    folium.CircleMarker(
        location=[lat, lon],
        radius=3,
        color=determine_color(dummy),
        fill=True,
        fill_color=determine_color(dummy),
        fill_opacity=0.6,
    ).add_to(m)


  from IPython.core.display import display, HTML


In [19]:
m