In [16]:
import pandas as pd
import geopandas as gpd
import topojson as tp
import re
import warnings

# read in sales
sales_df = pd.read_csv('Data/ClaytonSales.csv', dtype={'GEOID': 'str'})

# read in census tracts
clayton_ct = gpd.read_file('Data/clayton_CTs.gpkg')

# pare down the dataframe
clayton_ct = clayton_ct[[
    'GEOID',
    'Sub_geo',
    'geometry'
]]

# join the sales to the tracts to get the sub geometry of each sale
sales_df = pd.merge(
    sales_df,
    clayton_ct,
    how='left',
    left_on='GEOID',
    right_on='GEOID'
)

# ignore the warnings that come with simplifying geographically
warnings.filterwarnings("ignore", category=RuntimeWarning)

toposimplify = 0.001
clayton_simp = tp.Topology(clayton_ct, toposimplify=toposimplify).to_gdf()

# export simplified geometry
clayton_simp.to_file('Data/clayton_CTs_simp.geojson')

# weed-out filters
sales_df = sales_df[sales_df['GEOID'] != '13063040422']

# Now that it's joined, drop geometry and lat/longs
sales_df = sales_df.drop(columns=[
    'geometry'
])

# drop NaN rows for any columns used in the filters (basically just year & yr_blt)
sales_df = sales_df.dropna(subset='yr_blt')

# export
sales_df.to_csv('Data/Clayton_20-24.csv', index=False)

# print('export complete!')
print(f'dataframe rows: {sales_df.shape[0]:,}')
print('------')
sales_df.head(3)

dataframe rows: 15,799
------


Unnamed: 0,GEOID,year,sale_amt,home_size,yr_blt,price_sf,Sub_geo
0,13063040202,2020,75500.0,1072.0,1980.0,176.30597,NW Clayton
1,13063040202,2022,210000.0,1500.0,2003.0,140.0,NW Clayton
2,13063040202,2023,225000.0,1223.0,1984.0,183.973835,NW Clayton
