SDAT information is maintained from a baseline that is updated to Q2 2020, available in a zip file on this link.  The file interest is: Dorc2017.dbf  
https://www.dropbox.com/s/oc1l1frorg66vlr/DORC_MPV17.zip
extract it using: $> tar -xf DORC_MPV17.zip Dorc2017/ATDATA/DATABASE/Dorc2017dbf

Updating this file requires downloading monthly (or quarterly) changes listed on this page:
https://planning.maryland.gov/Pages/OurProducts/DownloadFiles.aspx

All of the updates through 4/30/2021 have been aggegated in a single file written from this notebook.

### NOTE: This notebook adds the assessment fields onto the SDAT before saving it.

In [None]:
# add any packages that aren't available by default
!pip install simpledbf

Collecting simpledbf
  Downloading simpledbf-0.2.6.tar.gz (17 kB)
Building wheels for collected packages: simpledbf
  Building wheel for simpledbf (setup.py) ... [?25l[?25hdone
  Created wheel for simpledbf: filename=simpledbf-0.2.6-py3-none-any.whl size=13801 sha256=c7994a77ffacedf8bb61457f561de291d3290eb270cdd7e178ecbc92ef3eebaa
  Stored in directory: /root/.cache/pip/wheels/24/43/f4/39ad84349e5358346be977fe626160f5625fdd3ea8e017518c
Successfully built simpledbf
Installing collected packages: simpledbf
Successfully installed simpledbf-0.2.6


In [None]:
import pandas as pd
from simpledbf import Dbf5

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### Open the baseline file from the state

In [None]:
dbf = Dbf5('drive/My Drive/SDAT/Dorc2017.dbf')
df = dbf.to_dataframe()
df = df.set_index('acctid')

#### Discover all the update files, and append them in the order they were published

In [None]:
from os import walk

def update(df):
  print("rows:",len(df))
  for (dirpath, dirnames, filenames) in walk('drive/My Drive/SDAT/'):
      for file in ([name for name in sorted(filenames) if 'SALE' in name.upper()]):
        print(dirpath+file)
        add_df = Dbf5(dirpath+file)
        new_df = add_df.to_dataframe()
        new_df.columns = [col.lower() for col in new_df.columns]
        new_df = new_df.query('jurscode == "DORC"').set_index('acctid')
        new_df = new_df[~new_df.index.duplicated(keep='last')]
        updates = [str(v) for v in set(df.index.values).intersection(set(new_df.index.values)) if int(v) > 0]
        df = df.drop(updates)
        update_columns = set(df.columns).intersection(set(new_df.columns))
        df = df.append(new_df[update_columns])
      break
  df = df[~df.index.duplicated(keep='last')]
  print("final:",len(df))
  return df

merged_df = update(df.copy())

rows: 23202
drive/My Drive/SDAT/SALE0420.dbf
drive/My Drive/SDAT/SALE0520.dbf
drive/My Drive/SDAT/SALE0620.dbf
drive/My Drive/SDAT/SALE0720.dbf
drive/My Drive/SDAT/SALE0820.dbf
drive/My Drive/SDAT/SALE1020.dbf
drive/My Drive/SDAT/SALE1120.dbf
drive/My Drive/SDAT/SALE1220.dbf
drive/My Drive/SDAT/Sale0121.dbf
drive/My Drive/SDAT/Sale0221.dbf
drive/My Drive/SDAT/Sale0321.dbf
drive/My Drive/SDAT/Sale0421.dbf
drive/My Drive/SDAT/Sale0521.dbf
drive/My Drive/SDAT/Sale0621.dbf
drive/My Drive/SDAT/Sale0721.dbf
final: 23215


#### Check to verify the updates from MD are applied OK.  Look up one record that we know was updated.  The merged_df should be like the new record, and the original df should be different now.

In [None]:
add_df = Dbf5('drive/My Drive/SDAT/Sale0721.dbf')
test_df = add_df.to_dataframe()
test_df.columns = [col.lower() for col in test_df.columns]
test_df = test_df.query('jurscode == "DORC"').set_index('acctid')

test_df#.query('acctid == "10007106653"')

Unnamed: 0_level_0,jurscode,digxcord,digycord,ct2010,bg2010,geogcode,ooi,address,city,zipcode,ownname1,ownname2,namekey,ownadd1,ownadd2,owncity,ownstate,ownerzip,ownzip2,premsnum,premsdir,premsnam,premstyp,premcity,premzip,premzip2,legal1,legal2,legal3,dr1clerk,dr1liber,dr1folio,towncode,desctown,subdivsn,dsubcode,descsubd,plat,section,block,...,depth,pfuw,pfus,pflw,pfsp,pfsu,pfic,pfih,recind,yearblt,sqftstrc,strugrad,descgrad,strucnst,desccnst,strustyl,descstyl,strubldg,descbldg,lastinsp,lastassd,assessor,transno1,grntnam1,gr1clrk1,gr1libr1,gr1folo1,convey1,tradate,considr1,mortgag1,curlndvl,curimpvl,curttlvl,sallndvl,salimpvl,salttlvl,ptype,sdatwebadr,existing
acctid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1001000020,DORC,505757.4,99135.2,24019970100,240199701002,80,D,5430 INDIANTOWN ROAD,RHODESDALE,21659,PLEASANTS THOMAS R,,PLEASANTS THOMAS R,5430 INDIANTOWN RD,,RHODESDALE,MD,21659,,5430,,INDIANTOWN,RD,RHODESDALE,21659,,IMPV58.963 ACRES,W/S INDIANTOWN RD.,S OF BROOKVIEW,,01595,0347,,,0000,010000,,,,,...,0.0,1,2,0,0,0,0,0,0,1976,3028,4,Codes range from lowest to highest quality 1-9,001,CNST Siding,001,STRY 1 Story No Basement,001,DWEL Standard Unit,201709,189912,1014,,PLEASANTS THOMAS L & ARLENE F,PLC,00266,0744,4,20200914,0,0,71800,266300,0,71800,266300,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18
1001000063,DORC,511793.2,105251.7,24019970100,240199701002,80,N,6002 ALLEN ROAD,RHODESDALE,21659,M & V DONOVAN FARMS LLC,,M & V DONOVAN FARMS LLC,6002 ALLEN RD,RT 3 BOX 270,SEAFORD,DE,19973,6057,6002,,ALLEN,RD,SEAFORD,19973,,IMPV72 ACRES,E/S ALLEN RD.,S OF RELIANCE,,01658,0208,,,0000,010000,,,,,...,0.0,1,2,0,0,0,0,0,0,2004,2491,4,Codes range from lowest to highest quality 1-9,001,CNST Siding,001,STRY 1 Story No Basement,001,DWEL Standard Unit,201709,189912,1014,,DONOVAN MICHAEL & VICKI,PLC,00243,0368,4,20210617,0,0,75600,289800,0,75600,289800,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18
1001000489,DORC,510623.4,107153.7,24019970100,240199701001,80,N,,RHODESDALE,,ROBERT ALLAN AND BRENDA L WOOD IRR,,ROBERT ALLAN AND BRENDA L,28688 ELLIS MILL RD,,SEAFORD,DE,19973,,,,BETHEL,RD,,,,44.86 ACRES,S OF BETHEL RD.,SW OF RELIANCE,,01654,0324,,,0000,010000,,,,,...,0.0,1,1,0,0,0,0,0,0,,0,,,,,,,,,201708,189912,1014,,WOOD BRENDA D,,01474,0337,4,20210601,0,0,14500,0,0,14500,0,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18
1001000616,DORC,507323.8,107730.4,24019970100,240199701001,80,N,,FEDERALSBURG,,SMULLEN GLORIA,KILLETTE GWENDOLYN,SMULLEN GLORIA,175 HIGHLAND AVE,,PISCATAWAY,NJ,08854,,,,MD RT 392,,,,,4.44 ACRES,S/S MD RT 392,,,01653,0305,,,0000,010000,,,,,...,0.0,1,1,0,0,0,0,0,0,,0,,,,,,,,,201708,189912,1014,,BOLDEN NETTIE EVANS,,00099,0261,4,20210527,0,0,33600,0,0,33600,0,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18
1001001078,DORC,506245.4,101882.2,24019970100,240199701002,80,N,,RHODESDALE,,ROBINSON WILLIS J,ROBINSON CAROL L,ROBINSON WILLIS J,P O BOX 214,,SHARPTOWN,MD,21861,,,,,,,,,63.932 ACRES,SW/S ELDORADO-SHARPTOWN,,,01624,0320,,,0000,010000,,,,,...,0.0,1,1,0,0,0,0,0,0,,0,,,,,,,,,201709,189912,1014,,ROBINSON WILLIS J,,00204,0630,4,20210202,0,0,11100,0,0,11100,0,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1018001128,DORC,486110.6,71591.0,24019970900,240199709001,82,N,2211 ELLIOTT ISLAND ROAD,VIENNA,21869,MARTINEK CHESTER DANIEL,MARTINEK MELISSA G,MARTINEK CHESTER DANIEL,2232 ELLIOTT ISLAND RD,,VIENNA,MD,21869,9633,2211,,ELLIOTT ISLAND,RD,,,,IMPV2.06 ACRES,E/S FISHING BAY,,,01597,0100,,,0000,180000,,,,,...,0.0,2,2,1,0,0,0,0,0,1970,938,3,Codes range from lowest to highest quality 1-9,002,CNST Frame,MH1,HOUSING Mobile Home,006,DWEL Mobile Home,201910,189912,1018,,MARTINEK DANIEL D AND ETAL,,01138,0110,4,20200923,0,0,151500,5900,0,151500,5900,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18
1018001721,DORC,486204.8,71552.5,24019970900,240199709001,82,N,,VIENNA,,MARTINEK CHESTER DANIEL,MARTINEK MELISSA G,MARTINEK CHESTER DANIEL,2232 ELLIOTT ISLAND RD,,VIENNA,MD,21869,9633,,,ELLIOTT ISLAND,RD,,,,1 ACRE,S OF ELLIOTT ISLAND RD.,,,01597,0100,,,0000,180000,,,,,...,0.0,0,0,1,0,1,0,0,0,,0,,,,,,,,,201910,189912,1018,,MARTINEK DANIEL D AND ETAL,,01138,0110,4,20200923,0,0,1000,0,0,1000,0,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18
1018001764,DORC,487882.7,71842.0,24019970900,240199709001,82,N,2357 ELLIOTT ISLAND ROAD,VIENNA,21869,ZIMMERMAN EARL,ZIMMERMAN BRENDA,ZIMMERMAN EARL,3 BUCH MILL RD,,LITITZ,PA,17543,,2357,,ELLIOTT ISLAND,RD,VIENNA,21869,9603,"IMPV30,492 SQ. FT.",S OF ELLIOTT ISLAND RD.,ELLIOTT ISLAND,,01626,0290,,,0000,180000,,,,,...,0.0,2,2,0,0,1,0,0,0,1984,1512,4,Codes range from lowest to highest quality 1-9,001,CNST Siding,003,STRY 1.5 Story No Basement,001,DWEL Standard Unit,201910,189912,1018,,JEFFERSON JOYCE L,MLB,00903,0368,1,20210210,179000,0,24300,110200,0,24300,110200,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18
1018001772,DORC,486501.3,71460.5,24019970900,240199709001,82,H,2233 ELLIOTT ISLAND ROAD,VIENNA,21869,MARTINEK HOWARD FRANKLIN,MARTINEK ROCHELLE M,MARTINEK HOWARD FRANKLIN,2233 ELLIOTT ISLAND RD,,VIENNA,MD,21869,9633,2233,,ELLIOTT ISLAND,RD,VIENNA,21869,,IMPV2 ACRES,S OF ELLIOTT ISLAND RD,E/S FISHING BAY,,01597,0105,,,0000,180000,,,,,...,0.0,2,2,1,0,1,0,0,0,1955,2016,4,Codes range from lowest to highest quality 1-9,004,CNST Shingle Asbestos,001,STRY 1 Story No Basement,001,DWEL Standard Unit,201910,189912,1018,,MARTINEK DANIEL D AND ETAL,,01138,0105,4,20200923,0,0,151500,82000,0,151500,82000,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18


In [None]:
df.query('acctid == "1001000616"')

Unnamed: 0_level_0,jurscode,digxcord,digycord,ct2010,bg2010,geogcode,ooi,resityp,address,strtnum,strtdir,strtnam,strttyp,strtsfx,strtunt,addrtyp,city,zipcode,ownname1,ownname2,namekey,ownadd1,ownadd2,owncity,ownstate,ownerzip,ownzip2,premsnum,premsdir,premsnam,premstyp,premcity,premzip,premzip2,legal1,legal2,legal3,dr1clerk,dr1liber,dr1folio,...,assessor,transno1,grntnam1,gr1clrk1,gr1libr1,gr1folo1,convey1,tradate,considr1,mortgag1,nfmlndvl,nfmimpvl,nfmttlvl,crtarcod,fcmacode,agfndarea,agfndluom,entzndat,entznassm,plndevdat,nprctstdat,nprcarea,nprcluom,homqlcod,homqldat,bldg_story,bldg_units,resident,resi2010,resi2000,resi1990,resiuths,aprtment,trailer,special,other,ptype,sdatwebadr,existing,mdpvdate
acctid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1001000616,DORC,507323.8,107730.4,24019970100,240199701001,80,N,SF,,,,,,,,,FEDERALSBURG,21632,BOLDEN NETTIE M,,BOLDEN NETTIE M,C/O GWENOLYN KILLETTE,310 GLEN OAK CIR,HURLOCK,MD,21643,,,,,,,,,4.44 ACRES,S/S MD RT 392,,,99,261,...,1014,,,,,,,,0,0,33600,0,33600,,,0.0,,,0,,,0.0,,,,,0,0,0,0,0,0,0,0,0,0,2,http://sdat.dat.maryland.gov/RealProperty/Page...,MDPV2017_18,2020JUN


In [None]:
merged_df.query('acctid == "1001000616"')

Unnamed: 0_level_0,jurscode,digxcord,digycord,ct2010,bg2010,geogcode,ooi,resityp,address,strtnum,strtdir,strtnam,strttyp,strtsfx,strtunt,addrtyp,city,zipcode,ownname1,ownname2,namekey,ownadd1,ownadd2,owncity,ownstate,ownerzip,ownzip2,premsnum,premsdir,premsnam,premstyp,premcity,premzip,premzip2,legal1,legal2,legal3,dr1clerk,dr1liber,dr1folio,...,assessor,transno1,grntnam1,gr1clrk1,gr1libr1,gr1folo1,convey1,tradate,considr1,mortgag1,nfmlndvl,nfmimpvl,nfmttlvl,crtarcod,fcmacode,agfndarea,agfndluom,entzndat,entznassm,plndevdat,nprctstdat,nprcarea,nprcluom,homqlcod,homqldat,bldg_story,bldg_units,resident,resi2010,resi2000,resi1990,resiuths,aprtment,trailer,special,other,ptype,sdatwebadr,existing,mdpvdate
acctid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1001000616,DORC,507323.8,107730.4,24019970100,240199701001,80,N,,,,,,,,,,FEDERALSBURG,,SMULLEN GLORIA,KILLETTE GWENDOLYN,SMULLEN GLORIA,175 HIGHLAND AVE,,PISCATAWAY,NJ,8854,,,,MD RT 392,,,,,4.44 ACRES,S/S MD RT 392,,,1653,305,...,1014,,BOLDEN NETTIE EVANS,,99,261,4,20210527,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18,


#### Verify the result joins cleanly with some enrichment data we carry from year to year  
This is things like names of rental operation groups, notes, etc that we might want to reference with the new data.  This will be added later so we don't have to store it multiple times.

In [None]:
enrichment = pd.read_csv("drive/My Drive/SDAT/CAN-ref.csv").set_index('acctid')
e_merge = merged_df.merge(enrichment, left_index=True, right_index=True, how='outer', indicator=True)
print((len(enrichment), "records. Enriched after the join:", len(e_merge.query('_merge == "both"'))))
print("These have an issue, but that looks ok because they aren't housing:",len(e_merge.query('_merge == "right_only"')))
#e_merge.query('_merge == "right_only"')

(7924, 'records. Enriched after the join:', 7859)
These have an issue, but that looks ok because they aren't housing: 65


### Add the latest assessment data, grabbing it directly from MD Open Data

In [None]:
assessments = pd.read_csv('https://opendata.maryland.gov/resource/ed4q-f8tm.csv?jurisdiction_code_mdp_field_jurscode=DORC&$limit=25000')
assessment_fields = ['account_id_mdp_field_acctid','real_property_search_link',
                     'search_google_maps_for_this_location',
                     'c_a_m_a_system_data_structure_area_sq_ft_mdp_field_sqftstrc_sdat_field_241',
                     'current_assessment_year_total_phase_in_value_sdat_field_171',
                     'c_a_m_a_system_data_year_built_yyyy_mdp_field_yearblt_sdat_field_235',
                     'premise_address_number_mdp_field_premsnum_sdat_field_20',
                     'premise_address_number_suffix_sdat_field_21',
                     'premise_address_direction_mdp_field_premsdir_sdat_field_22',
                     'premise_address_name_mdp_field_premsnam_sdat_field_23',
                     'premise_address_type_mdp_field_premstyp_sdat_field_24',
                     'premise_address_city_mdp_field_premcity_sdat_field_25',
                     'premise_address_zip_code_mdp_field_premzip_sdat_field_26',
                     'mdp_street_address_mdp_field_address']
assessments = assessments[assessment_fields]
assessment_column_names = ['acctid','sdat','google_maps','struct_sqft','assessed_value','address_number','address_unit_id','street_direction','street_name','street_type']
assessments.rename(columns=dict(zip(assessment_fields,assessment_column_names)),inplace=True)
assessments.acctid = assessments.acctid.astype(str)
assessments.set_index('acctid',inplace=True)

  interactivity=interactivity, compiler=compiler, result=result)


In [None]:
assessments.query('acctid == "1007111649"')

Unnamed: 0,acctid,jurscode,digxcord,digycord,ct2010,bg2010,geogcode,ooi,resityp,address,strtnum,strtdir,strtnam,strttyp,strtsfx,strtunt,addrtyp,city,zipcode,ownname1,ownname2,namekey,ownadd1,ownadd2,owncity,ownstate,ownerzip,ownzip2,premsnum,premsdir,premsnam,premstyp,premcity,premzip,premzip2,legal1,legal2,legal3,dr1clerk,dr1liber,...,fcmacode,agfndarea,agfndluom,entzndat,entznassm,plndevdat,nprctstdat,nprcarea,nprcluom,homqlcod,homqldat,bldg_story,bldg_units,resident,resi2010,resi2000,resi1990,resiuths,aprtment,trailer,special,other,ptype,sdatwebadr,existing,mdpvdate,sdat,google_maps,struct_sqft,assessed_value,address_number,address_unit_id,street_direction,street_name,street_type,premise_address_type_mdp_field_premstyp_sdat_field_24,premise_address_city_mdp_field_premcity_sdat_field_25,premise_address_zip_code_mdp_field_premzip_sdat_field_26,mdp_street_address_mdp_field_address,_merge
5054,1007111649,DORC,479567.2,100422.5,24019970500,240199705003,81,N,SF,705 ROSEMONT AVE,705,,ROSEMONT,AVE,,AVE,P,CAMBRIDGE,21613,RIGBY AVENUE LLC,,RIGBY AVENUE LLC,3550 PATUXENT RIVER RD,,DAVIDSONVILLE,MD,21035,2230,705,,ROSEMONT,AVE,CAMBRIDGE,21613,,"IMPSLOT 50-3,278 SQ.FT.",705-707 ROSEMONT AVENUE,CAMBRIDGE,MLB,404,...,,0.0,,,0.0,,,0.0,,,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,http://sdat.dat.maryland.gov/RealProperty/Page...,MDPV2017_18,2020JUN,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.56775737...,1056.0,31533.0,1960.0,705.0,,,ROSEMONT,AVE,CAMBRIDGE,21613.0,705 ROSEMONT AVE,both


In [None]:
on=sdat_plus_assessments = merged_df.reset_index().merge(assessments,on='acctid',how='outer',indicator=True).set_index('acctid')
print("SDAT has ",len(merged_df), "records.  After the join there are:",len(sdat_plus_assessments.query('_merge == "both"')))

SDAT has  23215 records.  After the join there are: 23212


### Write the combined data set out for use later.

In [None]:
sdat_plus_assessments.query('_merge == "both"').drop(columns='_merge').to_csv('drive/My Drive/pita 2021/SDAT-CAN-ref-202107.csv')

In [None]:
sdat_plus_assessments

Unnamed: 0_level_0,jurscode,digxcord,digycord,ct2010,bg2010,geogcode,ooi,resityp,address,strtnum,strtdir,strtnam,strttyp,strtsfx,strtunt,addrtyp,city,zipcode,ownname1,ownname2,namekey,ownadd1,ownadd2,owncity,ownstate,ownerzip,ownzip2,premsnum,premsdir,premsnam,premstyp,premcity,premzip,premzip2,legal1,legal2,legal3,dr1clerk,dr1liber,dr1folio,...,fcmacode,agfndarea,agfndluom,entzndat,entznassm,plndevdat,nprctstdat,nprcarea,nprcluom,homqlcod,homqldat,bldg_story,bldg_units,resident,resi2010,resi2000,resi1990,resiuths,aprtment,trailer,special,other,ptype,sdatwebadr,existing,mdpvdate,sdat,google_maps,struct_sqft,assessed_value,address_number,address_unit_id,street_direction,street_name,street_type,premise_address_type_mdp_field_premstyp_sdat_field_24,premise_address_city_mdp_field_premcity_sdat_field_25,premise_address_zip_code_mdp_field_premzip_sdat_field_26,mdp_street_address_mdp_field_address,_merge
acctid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1001000012,DORC,508948.5,110654.0,24019970100,240199701001,80,N,SF,5727 ADAMS ROAD,5727,,ADAMS,RD,,RD,P,FEDERALSBURG,21632,NAGEL RICHARD LEE & CONNIE JANE,,NAGEL RICHARD LEE & CONNI,5714 ADAMS RD,,FEDERALSBURG,MD,21632,1700,5727,,ADAMS,RD,FEDERALSBURG,21632,,52.94 ACRES,S/S ADAMS RD.,NE OF FINCHVILLE,MLB,00363,0779,...,,0.0,,,0.0,,,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,http://sdat.dat.maryland.gov/RealProperty/Page...,MDPV2017_18,2020JUN,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.65678537...,0.0,22100.0,0.0,5727.0,,,ADAMS,RD,FEDERALSBURG,21632.0,5727 ADAMS ROAD,both
1001000039,DORC,511216.1,106713.9,24019970100,240199701002,80,H,SF,6009 COKESBURY ROAD,6009,,COKESBURY,RD,,RD,P,RHODESDALE,19973,GARDINER KEVIN E,GARDINER LORI A,GARDINER KEVIN E,6009 COKESBURY RD,,SEAFORD,DE,19973,,6009,,COKESBURY,RD,SEAFORD,19973,,IMPS4.80 ACRES,E/S COKESBURY RD,SW/RELIANCE,,01493,0455,...,,0.0,,,0.0,,,0.0,,,,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,http://sdat.dat.maryland.gov/RealProperty/Page...,MDPV2017_18,2020JUN,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.62101071...,3492.0,287100.0,2004.0,6009.0,,,COKESBURY,RD,SEAFORD,19973.0,6009 COKESBURY ROAD,both
1001000047,DORC,508807.3,110360.1,24019970100,240199701001,80,N,SF,5731 DAVIS MILL POND ROAD,5731,,DAVIS MILL POND,RD,,RD,P,FEDERALSBURG,21632,HARIM MILLSBORO LLC,,HARIM MILLSBORO LLC,PO BOX 1380,MAILSTOP 100484,MILLSBORO,DE,19966,,5731,,DAVIS MILL POND,RD,,,,IMPS20 ACRES,W/S DAVIS MILLPOND RD,NE/FINCHVILLE,,01471,0011,...,,0.0,,,0.0,,,0.0,,,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,http://sdat.dat.maryland.gov/RealProperty/Page...,MDPV2017_18,2020JUN,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.65415540...,1438.0,90400.0,1920.0,5731.0,,,DAVIS MILL POND,RD,,0.0,5731 DAVIS MILL POND ROAD,both
1001000055,DORC,507295.0,112993.8,24019970100,240199701001,80,N,TR,6940 RELIANCE ROAD,6940,,RELIANCE,RD,,RD,P,FEDERALSBURG,21632,HARIM MILLSBORO LLC,,HARIM MILLSBORO LLC,PO BOX 1380,MAILSTOP 100484,MILLSBORO,DE,19966,,6940,,RELIANCE,RD,,,,IMPS232 AC,S/W ALLENS COR-FDG RD,W/ALLENS COR,,01471,0011,...,,0.0,,,0.0,,,0.0,,,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,http://sdat.dat.maryland.gov/RealProperty/Page...,MDPV2017_18,2020JUN,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.67806461...,1056.0,181000.0,1989.0,6940.0,,,RELIANCE,RD,,0.0,6940 RELIANCE ROAD,both
1001000071,DORC,512503.8,108229.1,24019970100,240199701002,80,N,SF,6366 GALESTOWN RELIANCE ROAD,6366,,GALESTOWN RELIANCE,RD,,RD,P,RHODESDALE,21659,MAXWELL DAVID FRANKLIN,,MAXWELL DAVID FRANKLIN,PO BOX 902,,SEAFORD,DE,19973,0902,6366,,GALESTOWN RELIANCE,RD,,,,IMPS.58 ACRES,W/S GALESTOWN-RELIANCE,,,,,...,,0.0,,,0.0,,,0.0,,,,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,http://sdat.dat.maryland.gov/RealProperty/Page...,MDPV2017_18,2020JUN,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.63449609...,1424.0,117500.0,2009.0,6366.0,,,GALESTOWN RELIANCE,RD,,0.0,6366 GALESTOWN RELIANCE ROAD,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1007144938,,,,,,,,,,0,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.56699601...,0.0,1000.0,0.0,617.0,,,HIGH,ST,CAMBRIDGE,21613.0,617 HIGH ST,right_only
1007212615,,,,,,,,,,0,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.58162277...,0.0,18600.0,0.0,0.0,,,ALBEMARLE,CT,CAMBRIDGE,21613.0,,right_only
1015005556,,,,,,,,,,0,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.63387734...,0.0,18800.0,0.0,55.0,,,MIDDLE,ST,HURLOCK,21643.0,55 MIDDLE ST,right_only
1016000833,,,,,,,,,,0,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,http://sdat.dat.maryland.gov/RealProperty/Page...,https://maps.google.com/maps?t=h&q=38.47785831...,0.0,1000.0,0.0,0.0,,,HARRISVILLE,RD,,0.0,,right_only


In [None]:
#sdat_plus_assessments.reset_index(inplace=True)
sdat_plus_assessments.strtnum = sdat_plus_assessments.strtnum.fillna(0).astype(int).astype(str)
sdat_plus_assessments.strtdir = sdat_plus_assessments.strtdir.fillna("")
sdat_plus_assessments.strtnam = sdat_plus_assessments.strtnam.fillna("")
sdat_plus_assessments.strttyp = sdat_plus_assessments.strttyp.fillna("")
sdat_plus_assessments.strtunt = sdat_plus_assessments.strttyp.fillna("")
sdat_plus_assessments.premsnum = sdat_plus_assessments.premsnum.fillna(0).astype(int).astype(str)
sdat_plus_assessments.premsdir = sdat_plus_assessments.premsdir.fillna("")
sdat_plus_assessments.premsnam = sdat_plus_assessments.premsnam.fillna("")
sdat_plus_assessments.premstyp = sdat_plus_assessments.premstyp.fillna("")
sdat_plus_assessments.address = sdat_plus_assessments.address.astype(str).fillna("").replace('nan','')

def get_best_sdat_address(id,debug=False):
  #base_address = rentals[rentals.acctid == id].address.item().strip()
  x = sdat_plus_assessments[sdat_plus_assessments.acctid == id]
  print(x)
  strtnum = x.strtnum.item() if x.strtnum.item() != "0" else ""
  premsnum = x.premsnum.item() if x.premsnum.item() != "0" else ""
  address = x.address.item().strip()
  base_address = address
  if debug:
    print(id,base_address)
  alt_address = ""

  # prefer unit number available
  strtdir = x.strtdir.item().strip()
  strtnam = x.strtnam.item().strip()
  strttyp = x.strttyp.item().strip()
  strtunt = x.strtunt.item().strip() if not x.strtunt.item().strip() == strttyp else ""
  alt_address = re.sub(r" +"," "," ".join([strtnum,str(strtdir),str(strtnam),str(strttyp),str(strtunt)])).strip().replace('nan','').replace(r' +',' ')
  # prefer sdat address of some sort, so see if 'premises' value is available
  if len(alt_address) == 0:
    strtdir = x.premsdir.item().strip()
    strtnam = x.premsnam.item().strip()
    strttyp = x.premstyp.item().strip()
    strtunt = ""
    alt_address = re.sub(r" +"," "," ".join([str(premsnum),str(strtdir),str(strtnam),str(strttyp),str(strtunt)])).strip().replace('nan','').replace(r' +',' ')

  estimated_address = ""
  # use the value from comcate if nothing else is available
  if (len(address) == 0 or address.isspace()) or (len(alt_address) or alt_address.isspace()):
    estimated_address = sdat[sdat.acctid == id].legal2.item()#.fillna("").strip()
  else:
    estimated_address = address if len(address) >= len(alt_address) else alt_address

  return estimated_address if len(base_address) == 0 or len(base_address) < len(estimated_address) else base_address

get_best_sdat_address(1007111649,True)
#sdat[sdat.acctid == 1007104057]

Empty DataFrame
Columns: [acctid, jurscode, digxcord, digycord, ct2010, bg2010, geogcode, ooi, resityp, address, strtnum, strtdir, strtnam, strttyp, strtsfx, strtunt, addrtyp, city, zipcode, ownname1, ownname2, namekey, ownadd1, ownadd2, owncity, ownstate, ownerzip, ownzip2, premsnum, premsdir, premsnam, premstyp, premcity, premzip, premzip2, legal1, legal2, legal3, dr1clerk, dr1liber, dr1folio, towncode, desctown, subdivsn, dsubcode, descsubd, plat, pltliber, pltfolio, section, block, lot, map, grid, parcel, zoning, znchgdat, rzrealdat, ciuse, descciuse, exclass, descexcl, lu, desclu, acres, landarea, luom, width, depth, pfuw, pfus, pflw, pfsp, pfsu, pfic, pfih, recind, permittyp, yearblt, sqftstrc, strugrad, descgrad, strucnst, desccnst, strustyl, descstyl, strubldg, descbldg, lastinsp, lastassd, assessor, transno1, grntnam1, gr1clrk1, gr1libr1, gr1folo1, convey1, tradate, considr1, mortgag1, ...]
Index: []

[0 rows x 144 columns]


ValueError: ignored

In [None]:
sdat_plus_assessments['best_address'] = sdat_plus_assessments.apply(lambda x: get_best_address(x.acctid),axis=1)

NameError: ignored