In [1]:
import pandas as pd
import geopandas as gpd
import topojson as tp
import re
import warnings

# read in sales
sales_df = pd.read_csv('Data/GwinnettSales.csv', index_col=0)

# convert pandas to geodataframe
sales_df = gpd.GeoDataFrame(
    sales_df,
    geometry=gpd.points_from_xy(
        sales_df['long'], sales_df['lat']),
    crs="EPSG:4269"
)

# read in census tracts
gwinnett_ct = gpd.read_file('Data/gwinnett_CTs.geojson')

# pare down the dataframe
gwinnett_ct = gwinnett_ct[[
    'GEOID',
    'Sub_geo',
    'geometry'
]]

# ignore the warnings that come with simplifying geographically
warnings.filterwarnings("ignore", category=RuntimeWarning)

toposimplify = 0.001
gwinnett_simp = tp.Topology(gwinnett_ct, toposimplify=toposimplify).to_gdf()

# export simplified geometry
gwinnett_simp.to_file('Data/gwinnett_CTs_simp.geojson')

# spatial join
sales_joined = sales_df.sjoin(gwinnett_simp, how="left")

sales_joined = sales_joined.drop(columns='index_right')

sales_joined['Sub_geo'] = sales_joined['Sub_geo'].replace({
    'Snellville/S Gwinnett': 'Snellville/S. Gwinnett'
})

# weed-out filters
sales_joined = sales_joined[sales_joined['sale_amt'] >= 10000]

# export
sales_joined.to_csv('Data/Gwinnett_20-24.csv', index=False)

# print('export complete!')
print(f'dataframe rows: {sales_joined.shape[0]:,}')

# sanity check
sales_joined.sort_values(by='home_size', ascending=True).head(10)

dataframe rows: 75,835


Unnamed: 0_level_0,address,sale_date,year,month,year-month,sale_amt,home_size,price_sf,yr_blt,lat,long,geometry,GEOID,Sub_geo
ATTOM_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
241058472,2109 PINE TREE DR # C4,2021-10-26,2021,10,2021-10,81500.0,299.0,272.575251,1953.0,34.149477,-84.024714,POINT (-84.02471 34.14948),13135050110,Buford/Sugar Hill
32056033,2109 PINE TREE DR # C17,2020-11-13,2020,11,2020-11,125000.0,430.0,290.697674,1953.0,34.149477,-84.024714,POINT (-84.02471 34.14948),13135050110,Buford/Sugar Hill
171483411,1433 BORDER ST,2023-12-15,2023,12,2023-12,122000.0,528.0,231.060606,1945.0,34.105945,-84.025256,POINT (-84.02526 34.10594),13135050105,Buford/Sugar Hill
31174633,2895 JONES MILL RD,2020-05-20,2020,5,2020-5,200000.0,550.0,363.636364,1959.0,33.945668,-84.26812,POINT (-84.26812 33.94567),13135050332,Norcross/Peachtree Corners
31174633,2895 JONES MILL RD,2021-06-17,2021,6,2021-6,220000.0,550.0,400.0,1959.0,33.945668,-84.26812,POINT (-84.26812 33.94567),13135050332,Norcross/Peachtree Corners
171017207,187 HUFF DR,2021-05-04,2021,5,2021-5,16500.0,572.0,28.846154,1950.0,33.923991,-84.063704,POINT (-84.06370 33.92399),13135050567,Lawrenceville
171474303,5290 SYCAMORE RD,2021-06-01,2021,6,2021-6,140000.0,576.0,243.055556,1963.0,34.113758,-84.041317,POINT (-84.04132 34.11376),13135050112,Buford/Sugar Hill
171072606,3955 CARRIAGE GATE DR,2022-12-20,2022,12,2022-12,380000.0,600.0,633.333333,1985.0,34.018468,-84.143337,POINT (-84.14334 34.01847),13135050234,Duluth
171072606,3955 CARRIAGE GATE DR,2021-01-29,2021,1,2021-1,358000.0,600.0,596.666667,1985.0,34.018468,-84.143337,POINT (-84.14334 34.01847),13135050234,Duluth
171463267,3391 MEDLOCK BRIDGE RD,2021-10-12,2021,10,2021-10,150000.0,612.0,245.098039,1956.0,33.967719,-84.208665,POINT (-84.20866 33.96772),13135050336,Norcross/Peachtree Corners
