# Data Processing For Fulton County Sales and Parcel Data

### Helper Functions
---

In [2]:
import os
import polars as pl
import re

pl.enable_string_cache(True)
pl.Config.set_tbl_cols = 200

In [3]:
def read_file(path: str, dtypes: dict, new_names: list = []) -> pl.DataFrame:
    print("Reading file: ", path)
    df = pl.DataFrame()
    columns = [x for x in dtypes.keys()]
    name_mapping = dict(zip(columns, new_names))

    if path[-4:len(path)] == "xlsx":
        df = pl.read_excel(path,
                           xlsx2csv_options= {"ignore_formats": ["date", "float"], "infer_schema_length": 0},
                           read_csv_options= {"infer_schema_length": 0, "columns": columns, "dtypes": dtypes}
        )
    else:
        df = pl.read_csv(path,
                         separator= '\t',
                         columns= columns,
                         dtypes= dtypes)
    
    if new_names:
        df = df.rename(name_mapping)

    return df

def strip_parid(df: pl.DataFrame) -> pl.DataFrame:
    df = df.with_columns(
        pl.col('parid').str.replace_all(' ', '').alias('parid_strip')
    )
    return df

In [4]:
##############################
# SET PRE-PROCESSING VARIABLES
##############################
parcels_path = "C:/Users/nicho/Documents/research/FCS/data/digest/"
sales_path = "C:/Users/nicho/Documents/research/FCS/data/sales/"
parcel_dtypes = {
    'Taxyr': pl.Categorical, 'Parid': pl.Utf8, 'Nbhd': pl.Categorical,  
    'Situs Adrno': pl.Int64, 'Situs Adrdir': pl.Utf8, 
    'Situs Adrstr': pl.Utf8, 'Situs Adrsuf': pl.Utf8, 'Cityname': pl.Categorical, 
    'Zoning': pl.Categorical, 'Muni': pl.Categorical, 'Class': pl.Categorical, 
    'Luc': pl.Categorical, 'Livunit': pl.Utf8, 'Calcacres': pl.Utf8, 
    'Note1': pl.Utf8, 'Ofcard': pl.Int16, 'Chgrsn': pl.Categorical,
    'Taxdist': pl.Categorical, 'Own1': pl.Utf8, 
    'Own2': pl.Utf8, 'Owner Adrno': pl.Utf8, 
    'Owner Adradd': pl.Utf8, 'Owner Adrdir': pl.Categorical, 
    'Owner Adrstr': pl.Utf8, 'Owner Adrsuf': pl.Categorical, 
    'Owner Adrsuf2': pl.Utf8, 'Statecode': pl.Categorical, 
    'Country': pl.Categorical, 'Unitno': pl.Utf8, 'Zip1': pl.Utf8, 
    'Aprland': pl.Utf8, 'Aprbldg': pl.Utf8, 'Aprtot': pl.Utf8,
    'D Card': pl.Categorical, 'Style': pl.Categorical, 
    'D Yrblt': pl.Categorical, 'D Yrremod': pl.Int32,
    'Rmtot': pl.Int16, 'Rmbed': pl.Int16, 
    'Fixbath': pl.Int16, 'Fixhalf': pl.Int16
}
parcel_new_names = [
    'taxyr', 'parid', 'nbhd', 'site_adrno', 'site_adrdir',
    'site_adrstr', 'site_adrsuf', 'site_cityname',
    'zoning', 'site_muni', 'class', 'luc', 'livunit', 'calcacres',
    'note1', 'of_card', 'change_reason', 'taxdist', 'own1', 'own2',
    'own_adrno', 'own_adradd', 'own_adrdir', 'own_adrstr', 'own_adrsuf',
    'own_adrsuf2', 'own_statecode', 'own_country', 'own_unitno',
    'own_zip', 'aprland', 'aprbldg', 'aprtot', 'card', 'style', 'yrblt',
    'yr_remod', 'rmtot', 'rmbed', 'fixbath', 'fixhalf'
]
sales_dtypes = {
    'Taxyr': pl.Categorical, 'Saledt: Year (YYYY)': pl.Categorical,
    'Saledt: Month (Mon)': pl.Categorical, 'Parid': pl.Utf8,
    'Saledt': pl.Utf8, 'SALES PRICE': pl.Utf8, 
    'FAIR MARKET VALUE': pl.Utf8, 'DEED TYPE': pl.Categorical,
    'Aprland': pl.Utf8, 'Aprbldg': pl.Utf8,
    'Costval': pl.Utf8, 'Saleval': pl.Categorical, 'Who': pl.Utf8,
    'Wen': pl.Utf8, 'GRANTOR': pl.Utf8, 'GRANTEE': pl.Utf8,
    'Adrpre':pl.Categorical, 'Adrno': pl.Utf8,
    'Adrdir': pl.Categorical, 'Adrstr': pl.Utf8,
    'Adrsuf': pl.Categorical, 'Adrsuf2': pl.Categorical,
    'Cityname': pl.Categorical, 'Unitno': pl.Utf8,
    'Livunit': pl.Int16
}
sales_new_names = [
    'taxyr', 'saleyr', 'sale_month', 'parid',
    'sale_date', 'salesprice', 'fmv', 'deed',
    'sale_aprland', 'sale_aprbldg',
    'costval', 'saleval', 'appraiser', 'when', 'grantor',
    'grantee', 'sale_adrpre', 'sale_adrno', 'sale_adrdir',
    'sale_adrstr', 'sale_adrsuf', 'sale_adrsuf2',
    'sale_cityname', 'sale_unitno', 'livunit'
]

### Processing Procedure
---

Read all files to join; verify file groupings by year are as expected.

In [5]:
parcel_files = os.listdir(parcels_path)
sales_files = os.listdir(sales_path)
files_by_year = {}

for yr in range(2011, 2023):
    files_by_year[yr] = [sales_path + file for file in sales_files if str(yr) in file] \
                    + [parcels_path + file for file in parcel_files if str(yr) in file]
    
[files_by_year[x] for x in files_by_year][len(files_by_year)-2:]

[['C:/Users/nicho/Documents/research/FCS/data/sales/Sales2021.txt',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_14_2021.xlsx',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_17_2021.xlsx',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_atl_2021.xlsx',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_nf_2021.xlsx',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_sf_2021.xlsx'],
 ['C:/Users/nicho/Documents/research/FCS/data/sales/Sales2022.txt',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_14_2022.xlsx',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_17_2022.xlsx',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_atl_2022.xlsx',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_nf_2022.xlsx',
  'C:/Users/nicho/Documents/research/FCS/data/digest/parcel_sf_2022.xlsx']]

Procedure

- Merge Sales with Their Associated Parcel Info
- Look if there are duplicates within appended parcel data for each year
- Make an Appended Sales
- Make an Appended Parcel?
- All Parcel?

In [6]:
def merge_year(
    sales: str,
    parcel: list,
    merge_type: bool,
    save_steps: bool = True
) -> pl.DataFrame:
    '''Merges sales and parcel files for a given year
    
    Keyword arguments:
    sales: path to sales file
    parcel: path to parcel file
    merge_type: True if left join on sales, False if left join on parcel
    save_steps: True if steps should be saved to a CSV
    '''
    sale_yr = re.findall(r'\d+', sales)[0]
    sales_df = read_file(sales, sales_dtypes, sales_new_names)
    parcel_dfs = [read_file(file, parcel_dtypes, parcel_new_names) for file in parcel]
    parcel_append = pl.concat(parcel_dfs)

    parcel_init_len = len(parcel_append)

    parcel_append = parcel_append.unique()

    sales_df = strip_parid(sales_df)
    parcel_append = strip_parid(parcel_append)

    if merge_type:
        merged = sales_df.join(parcel_append, how='left', on=['parid_strip', 'taxyr'])
    else:
        merged = parcel_append.join(sales_df, how='left', on=['parid_strip', 'taxyr'])

    if save_steps:
        parcel_append.write_parquet('../output/parcel/parcels_appended_' + sale_yr + '.parquet')
        merged.write_parquet('../output/merged/parcel_sales' + sale_yr + '.parquet')

    parcel_final_len = len(parcel_append)

    print("Original size of sales: ", len(sales_df))
    print("Original size of parcels: ", parcel_init_len)
    print('Additional rows generated (merged - sales): ', len(merged) - len(sales_df))
    print('Duplicate rows in parcel data: ', parcel_init_len - parcel_final_len)

    unmatched = merged.filter(pl.col('parid_right').is_null())['parid'].to_list()
    error_path = r'../output/errors/merged_unmatched_' + sale_yr + '.csv'
    with open(error_path, 'w') as writer:
        for parid in unmatched:
            writer.write("%s\n" % parid)

    print("Wrote unmatched Parcel IDs to: ", error_path)
    print('Done with year ', sale_yr)

    return merged

In [7]:
sales_left_parcel = [merge_year(files[0], files[1:], False) for files in files_by_year.values()]

Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2011.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/digest/parcel_14_2010-2011.xlsx
Reading file:  C:/Users/nicho/Documents/research/FCS/data/digest/parcel_17_2010-2011.xlsx
Reading file:  C:/Users/nicho/Documents/research/FCS/data/digest/parcel_atl_2011-2012.xlsx
Reading file:  C:/Users/nicho/Documents/research/FCS/data/digest/parcel_nf_2010-2011.xlsx
Reading file:  C:/Users/nicho/Documents/research/FCS/data/digest/parcel_sf_2010-2011.xlsx
Original size of sales:  47806
Original size of parcels:  941997
Additional rows generated (merged - sales):  772858
Duplicate rows in parcel data:  136006
Wrote unmatched Parcel IDs to:  ../output/errors/merged_unmatched_2011.csv
Done with year  2011
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2012.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/digest/parcel_14_2012-2013.xlsx
Reading file:  C:/Users/nicho/Documents/research/FC

In [8]:
full_sales_parcel = pl.concat(sales_left_parcel, how='diagonal')

Verify data looks as expected

In [9]:
full_sales_parcel.sample(4)

taxyr,parid,nbhd,site_adrno,site_adrdir,site_adrstr,site_adrsuf,site_cityname,zoning,site_muni,class,luc,livunit,calcacres,note1,of_card,change_reason,taxdist,own1,own2,own_adrno,own_adradd,own_adrdir,own_adrstr,own_adrsuf,own_adrsuf2,own_statecode,own_country,own_unitno,own_zip,aprland,aprbldg,aprtot,card,style,yrblt,yr_remod,rmtot,rmbed,fixbath,fixhalf,parid_strip,saleyr,sale_month,parid_right,sale_date,salesprice,fmv,deed,sale_aprland,sale_aprbldg,costval,saleval,appraiser,when,grantor,grantee,sale_adrpre,sale_adrno,sale_adrdir,sale_adrstr,sale_adrsuf,sale_adrsuf2,sale_cityname,sale_unitno,livunit_right
cat,str,cat,i64,str,str,str,cat,cat,cat,cat,cat,str,str,str,i16,cat,cat,str,str,str,str,cat,str,cat,str,cat,cat,str,str,str,str,str,cat,cat,cat,i32,i16,i16,i16,i16,str,cat,cat,str,str,str,str,cat,str,str,str,cat,str,str,str,str,cat,str,cat,str,cat,cat,cat,str,i16
"""2012""","""14F00040004003…","""4618""",2943,,"""FAIRBURN""","""RD""","""ATLANTA""","""R4""","""5""","""R3""","""101""","""1""","""0.2581""","""AN CHGE TO 461…",1,"""RV""","""5""","""WILLIAMS JANET…",,"""2943""",,,"""FAIRBURN""","""RD""","""SW""","""GA""",,,"""30331""","""10100""","""22400""","""32500""","""1""","""2""","""1968""",,5.0,3.0,1.0,1.0,"""14F00040004003…",,,,,,,,,,,,,,,,,,,,,,,,
"""2016""","""11 06700265016…","""1100""",10990,,"""PARSONS""","""RD""","""FUL""","""R3C""","""57""","""R4""","""101""","""1""","""2.64""",,1,"""MN""","""57""","""FISHOV ILYA E…",,"""10990""",,,"""PARSONS""","""RD""",,"""GA""",,,"""30097""","""51700""","""65600""","""117300""","""1""","""2""","""1969""",,7.0,4.0,3.0,1.0,"""11067002650162…",,,,,,,,,,,,,,,,,,,,,,,,
"""2016""","""12 25420664030…","""1261""",2035,,"""SIX BRANCHES""","""DR""","""ROS""","""R5""","""45""","""R3""","""101""","""1""","""0.4778""","""ID""",1,"""MN""","""45""","""BOEHM CHRISTOP…",,"""2035""",,,"""SIX BRANCHES""","""DR""",,"""GA""",,,"""30076""","""44100""","""182200""","""226300""","""1""","""1""","""1975""",,8.0,3.0,2.0,0.0,"""12254206640300…",,,,,,,,,,,,,,,,,,,,,,,,
"""2014""","""13 0125 LL449…","""1328""",2373,,"""MCGEE LANDING""",,"""FUL""","""AG1""","""55""","""R3""","""100""","""0""","""6.500000000000…","""AN 05 BLUECARD…",1,"""MN""","""55""","""FLOYD J HOLDIN…",,"""2331""",,,"""FLAT SHOALS""","""RD""",,"""GA""",,,"""30296""","""7500""","""0""","""7500""",,,,,,,,,"""130125LL4493""",,,,,,,,,,,,,,,,,,,,,,,,


In [10]:
unmatched = len(full_sales_parcel.filter(pl.col('parid_right').is_null()))
print('Total row count: ', len(full_sales_parcel))
print('Total unmatched parcels: ', unmatched)
print('Percent of total: ', unmatched / len(full_sales_parcel) * 100)

Total row count:  5713830
Total unmatched parcels:  5248580
Percent of total:  91.85747563368179


In [11]:
unique_sales = len(full_sales_parcel.select(['parid', 'taxyr']).unique())
print('Total unique [parid, taxyr] keys: ', unique_sales)
print('Total non-unique [parid, taxyr] keys: ', len(full_sales_parcel) - unique_sales)
print('Percent (non-unique) of total: ', (len(full_sales_parcel) - unique_sales) / len(full_sales_parcel) * 100)

Total unique [parid, taxyr] keys:  4337774
Total non-unique [parid, taxyr] keys:  1376056
Percent (non-unique) of total:  24.082900611323755


Compared to original sales

In [13]:
all_sales = [read_file(sales_path + file, sales_dtypes, sales_new_names) for file in sales_files]
all_sales_append = pl.concat(all_sales, how='diagonal')
all_sales_append.write_csv('../output/sales/sales_all_years.csv')

Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2011.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2012.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2013.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2014.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2015.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2016.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2017.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2018.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2019.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2020.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2021.txt
Reading file:  C:/Users/nicho/Documents/research/FCS/data/sales/Sales2022.txt


In [14]:
add_rows = len(full_sales_parcel) - len(all_sales_append)
print('Additional non-unique rows generated from processing: ', add_rows)
print('Percent of original sales: ', add_rows / len(all_sales_append) * 100)

Additional non-unique rows generated from processing:  5236592
Percent of original sales:  1097.270544256744


### Post-Processing
---

Save before post-processing to avoid having to re-run entire process.

In [15]:
full_sales_parcel.write_csv('../output/merged/pre_sales_parcel_left.csv')
full_sales_parcel.write_parquet('../output/merged/pre_sales_parcel_left.parquet')

Convert salesprice, fmv, costval -> float  
Convert own_adrno, own_zip -> int  
AND drop rows with bad entries for all of the above variables except costval

In [14]:
init_size = len(full_sales_parcel)
full_sales_parcel = full_sales_parcel.select(
    pl.col('salesprice').str.replace_all(',','').cast(pl.Float64),
    pl.col('fmv').str.replace_all(',','').cast(pl.Float64),
    pl.col('costval').str.replace_all(',','').cast(pl.Float64),
    pl.col('*').exclude(['salesprice', 'fmv', 'costval'])
)

Number of null own_adrno.

In [15]:
len(full_sales_parcel.filter(
    pl.col('own_adrno').is_null()
))

35173

Number of rows where both own_adrno and own_adrstr are null. We want count of null own_adrno = count of null own_adrstr, since we can assume there is no own_adrno if there is no street information either.

In [16]:
len(full_sales_parcel.filter(
    pl.col('own_adrno').is_null() &
    pl.col('own_adrstr').is_null()
))

17141

In [17]:
full_sales_parcel.filter(
    pl.col('own_adrno').is_null()
)[['own_adrno', 'own_adrstr']].sample(2)

own_adrno,own_adrstr
str,str
,
,


Extract own_adrno from PO boxes to reduce the number of nulls. Most null own_adrno are caused by PO boxes.

In [18]:
full_sales_parcel = full_sales_parcel.with_columns(
    pl.when(pl.col('own_adrno').is_null())
    .then(pl.col('own_adrstr').str.extract(r'(\d+)'))
    .otherwise(pl.col('own_adrno'))
    .alias('own_adrno')
)

Now we can drop nulls for own_adrno. We can also drop nulls for own_zip.

In [19]:
init_len = len(full_sales_parcel)
full_sales_parcel = full_sales_parcel.drop_nulls(subset=['own_adrno', 'own_zip'])
print('Number of rows dropped: ', init_len - len(full_sales_parcel))
print('Percent: ', (init_len - len(full_sales_parcel)) / len(full_sales_parcel) * 100)

Number of rows dropped:  18217
Percent:  3.9368019778015975


Investigate incorrect own_zip.

In [20]:
full_sales_parcel.filter(
    pl.col('own_zip').str.contains(r'[a-zA-Z]')
)['own_zip'].sample(5)

own_zip
str
"""B3S 1"""
"""L7M4H"""
"""B3S 1"""
"""B3S 1"""
"""N4N3V"""


Number of incorrect own_zip.

In [21]:
len(full_sales_parcel.filter(
    pl.col('own_zip').str.contains(r'[a-zA-Z]')
))

165

In [22]:
len(full_sales_parcel)

462736

Drop incorrect own_zip.

In [23]:
init_len = len(full_sales_parcel)

full_sales_parcel = full_sales_parcel.with_columns(
    pl.when(pl.col('own_zip').str.contains(r'[a-zA-Z]'))
    .then(None)
    .otherwise(pl.col('own_zip'))
    .alias('own_zip')
).drop_nulls(['own_zip'])

print('Count dropped: ', init_len - len(full_sales_parcel))

Count dropped:  165


Cast own_adrno and own_zip -> int.

In [24]:
full_sales_parcel = full_sales_parcel.select(
    pl.col('own_adrno').cast(pl.Int32),
    pl.col('own_zip').cast(pl.Int32),
    pl.col('*').exclude(['own_adrno', 'own_zip'])
)

'Aprland': pl.Utf8, 'Aprbldg': pl.Utf8, 'Aprtot': pl.Utf8

In [25]:
full_sales_parcel.sample(5)

own_adrno,own_zip,salesprice,fmv,costval,taxyr,saleyr,sale_month,parid,sale_date,deed,sale_aprland,sale_aprbldg,saleval,appraiser,when,grantor,grantee,sale_adrpre,sale_adrno,sale_adrdir,sale_adrstr,sale_adrsuf,sale_adrsuf2,sale_cityname,sale_unitno,livunit,parid_strip,parid_right,nbhd,site_adrno,site_adrdir,site_adrstr,site_adrsuf,site_cityname,zoning,site_muni,class,luc,livunit_right,calcacres,note1,of_card,change_reason,taxdist,own1,own2,own_adradd,own_adrdir,own_adrstr,own_adrsuf,own_adrsuf2,own_statecode,own_country,own_unitno,aprland,aprbldg,aprtot,card,style,yrblt,yr_remod,rmtot,rmbed,fixbath,fixhalf
i32,i32,f64,f64,f64,cat,cat,cat,str,str,cat,str,str,cat,str,str,str,str,cat,str,cat,str,cat,cat,cat,str,i16,str,str,cat,i64,str,str,str,cat,cat,cat,cat,cat,str,str,str,i16,cat,cat,str,str,str,cat,str,cat,str,cat,cat,str,str,str,str,cat,cat,cat,i32,i16,i16,i16,i16
6910,30328,0.0,301000.0,301700.0,"""2011""","""2010""","""Apr""","""17 01270001011…","""14-APR-2010""","""QC""","""78000""","""223000""","""G""","""TA_LPRICE""","""21-JUN-2010""","""HALL CLAIRE""","""TAYLOR CLAIRE …",,"""6910""",,"""BRANDON MILL""","""RD""","""NW""","""SANDY SPRINGS""",,1,"""17012700010118…","""17 01270001011…","""17831""",6910,,"""BRANDON MILL""","""RD""","""SANDY SPRINGS""","""R3""","""59""","""R3""","""101""","""1""","""0.429200000000…","""AN ADDED AGE 4…",1,"""MN""","""59""","""TAYLOR CLAIRE …",,,,"""BRANDON MILL""","""RD""","""NW""","""GA""",,,"""78000""","""223000""","""301000""","""1""","""2""","""1957""",,7,3,2,0
7125,30328,390000.0,390000.0,387900.0,"""2016""","""2015""","""May""","""17 00740001055…","""29-MAY-2015""","""LW""","""53,700""","""336,300""","""9""","""TA_JBANKS""","""30-SEP-2015""","""TULISALO KARL …","""WALTHALL RYAN …",,"""7125""",,"""DUNCOURTNEY""","""DR""",,"""SANDY SPRINGS""",,1,"""17007400010558…","""17 00740001055…","""1783""",7125,,"""DUNCOURTNEY""","""DR""","""SANDY SPRINGS""","""R3""","""59""","""R3""","""101""","""1""","""0.503""","""AN PKD UP HSE …",1,"""SP""","""59""","""WALTHALL RYAN …",,,,"""DUNCOURTNEY""","""DR""",,"""GA""",,,"""53700""","""336300""","""390000""","""1""","""1""","""1968""",,9,5,2,1
20465,30325,25000.0,96200.0,96200.0,"""2012""","""2011""","""Apr""","""14 01650004114…","""29-APR-2011""","""WD""","""13700""","""82500""","""0""","""TA_NSNEED""","""06-FEB-2012""","""C & B PAYNE FA…","""CMC HOLDINGS &…",,"""1806""",,"""NEELY""","""AVE""",,"""EP""",,2,"""14016500041147…","""14 01650004114…","""14474""",1806,,"""NEELY""","""AVE""","""EAST POINT""","""R1""","""20""","""R3""","""102""","""2""","""0.276000000000…",,1,"""RV""","""20""","""CMC HOLDINGS &…",,,,"""P O BOX 20465""",,,"""GA""",,,"""5210""","""19700""","""24910""","""1""","""6""","""1984""",,10,6,2,2
2651,30318,1.0,104900.0,104900.0,"""2022""","""2021""","""Sep""","""17 02510001009…","""09-SEP-2021""","""QC""","""52,500""","""52,400""","""T""","""TA_WBRITT""","""24-JAN-2022""","""MAPP EDDIE RUT…","""WALKER WANDA""",,"""2651""",,"""BROWNTOWN""","""RD""","""NW""","""ATLANTA""",,1,"""17025100010090…","""17 02510001009…","""1741""",2651,,"""BROWNTOWN""","""RD""","""ATLANTA""","""R4""","""5""","""R3""","""101""","""1""","""0.1148""","""8/16 CONFIRMED…",1,"""RV""","""5""","""WALKER WANDA""",,,,"""BROWNTOWN""","""RD""","""NW""","""GA""",,,"""52500""","""52400""","""104900""","""1""","""2""","""1950""",,4,2,1,0
56,30327,205000.0,173400.0,173400.0,"""2018""","""2017""","""Dec""","""17 01850005056…","""18-DEC-2017""","""LW""","""35,400""","""138,000""","""0""","""TA_VJARVIS""","""17-MAY-2018""","""CREAMER MARIAN…","""CASTRO BENJAMI…",,"""56""",,"""CANTEY""","""PL""","""NW""","""ATLANTA""",,1,"""17018500050568…","""17 01850005056…","""971""",56,,"""CANTEY""","""PL""","""ATLANTA""","""RG2""","""5""","""R3""","""106""","""1""","""2.98E-2""","""AN BLDG 9 U56…",1,"""RV""","""5""","""CASTRO BENJAMI…",,,,"""CANTEY""","""PL""","""NW""","""GA""",,,"""35400""","""138000""","""173400""","""1""","""10""","""1970""",,5,2,2,1


### Final output
---

In [26]:
full_sales_parcel.write_csv('../output/merged/sales_parcel_left.csv')
full_sales_parcel.write_parquet('../output/merged/sales_parcel_left.parquet')

### Data Modification
---

Read in data using Parquet files from previous processing

Transform as desired

Output