In [2]:
import pandas as pd
import numpy as np

In [3]:
# Load county data (already imported pandas in a previous cell)
countydata = pd.read_csv('countycombined.csv', dtype=str)

# Show the dtype of the original LUC column
print('LUC column dtype:', countydata['LUC'].dtype)

# Coerce LUC to numeric safely; non-numeric values become <NA>
countydata['LUC_numeric'] = pd.to_numeric(countydata['LUC'], errors='coerce').astype('Int64')

LUC column dtype: object


In [None]:
# Count single-family (LUC == 110)
sf_units = countydata['LUC_numeric'].eq(110).sum()

# Count multi-family using the intended range 114..120 (range end is inclusive)
mf_units_114_121_inclusive = countydata['LUC_numeric'].between(114, 121).sum()

print(f"Single Family Units (LUC == 110): {sf_units}")
print(f"Multi Family Units (LUC in 114..120): {mf_units_114_121_inclusive}")

Single Family Units (LUC == 110): 420782
Multi Family Units (LUC in 114..120): 7242
Non-numeric/missing LUC examples (up to 10): []


In [7]:
#Create a new dataframe with only single family units identified by LUC equaliing 110 and export to CSV
sf_df = countydata[countydata['LUC_numeric'] == 110]
sf_df.to_csv('single_family_units.csv', index=False)

In [8]:
#Create dataframe with SF and MF units and export to CSV
mf_df = countydata[countydata['LUC_numeric'].between(114, 121)]
sf_mf_df = pd.concat([sf_df, mf_df])
sf_mf_df.to_csv('sf_mf_units.csv', index=False)