In [1]:
!pip install dbfread
!pip install pyproj

Collecting dbfread
  Downloading https://files.pythonhosted.org/packages/4c/94/51349e43503e30ed7b4ecfe68a8809cdb58f722c0feb79d18b1f1e36fe74/dbfread-2.0.7-py2.py3-none-any.whl
Installing collected packages: dbfread
Successfully installed dbfread-2.0.7
Collecting pyproj
[?25l  Downloading https://files.pythonhosted.org/packages/ce/37/705ee471f71130d4ceee41bbcb06f3b52175cb89273cbb5755ed5e6374e0/pyproj-2.6.0-cp36-cp36m-manylinux2010_x86_64.whl (10.4MB)
[K     |████████████████████████████████| 10.4MB 2.7MB/s 
[?25hInstalling collected packages: pyproj
Successfully installed pyproj-2.6.0


In [0]:
import os
import re
import glob
import requests
from dbfread import DBF
from pyproj import CRS
from pyproj import Transformer

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [4]:
# Make the directory if it does not exist
# You can change the directory as you'd like
# !mkdir 'drive/My Drive/research'
# !mkdir 'drive/My Drive/research/data'
%cd 'drive/My Drive/House_research/data'

/content/drive/My Drive/House_research/data


Get the list of data files

In [0]:
download_page = "https://apps.planning.maryland.gov/Secure/REST/SharepointService1.svc/GetOpenData"
attachments = requests.get(download_page).json()
links = []
for attachment in attachments:
  if attachment["FILENAME"].endswith("MPV15"):
    links.append((attachment["FILELINK"], attachment["FILENAME"]))

Download all of the data files

In [0]:
for link, filename in links:
  print(f"downloading {filename}")
  os.system(f"wget -nc {link}?dl=1 -O {filename}.zip")

downloading ALLE_MPV15
downloading ANNE_MPV15
downloading BACO_MPV15
downloading BACI_MPV15
downloading CALV_MPV15
downloading CARO_MPV15
downloading CARR_MPV15
downloading CECI_MPV15
downloading CHAR_MPV15
downloading DORC_MPV15
downloading FRED_MPV15
downloading GARR_MPV15
downloading HARF_MPV15
downloading HOWA_MPV15
downloading KENT_MPV15
downloading MONT_MPV15
downloading PRIN_MPV15
downloading QUEE_MPV15
downloading SOME_MPV15
downloading STMA_MPV15
downloading TALB_MPV15
downloading WASH_MPV15
downloading WICO_MPV15
downloading WORC_MPV15


Unzip data files (this takes a long time)


In [0]:
!unzip \*.zip

In [7]:
%ls
# make sure all the files are here

[0m[01;34mAlle2015[0m/       [01;34mCaro2015[0m/       [01;34mFred2015[0m/       [01;34mMont2015[0m/       [01;34mTalb2015[0m/
ALLE_MPV15.zip  CARO_MPV15.zip  FRED_MPV15.zip  MONT_MPV15.zip  TALB_MPV15.zip
[01;34mAnne2015[0m/       [01;34mCarr2015[0m/       [01;34mGarr2015[0m/       [01;34mPrin2015[0m/       [01;34mWash2015[0m/
ANNE_MPV15.zip  CARR_MPV15.zip  GARR_MPV15.zip  PRIN_MPV15.zip  WASH_MPV15.zip
[01;34mBaci2015[0m/       [01;34mCeci2015[0m/       [01;34mHarf2015[0m/       [01;34mQuee2015[0m/       [01;34mWico2015[0m/
BACI_MPV15.zip  CECI_MPV15.zip  HARF_MPV15.zip  QUEE_MPV15.zip  WICO_MPV15.zip
[01;34mBaco2015[0m/       [01;34mChar2015[0m/       [01;34mHowa2015[0m/       [01;34mSome2015[0m/       [01;34mWorc2015[0m/
BACO_MPV15.zip  CHAR_MPV15.zip  HOWA_MPV15.zip  SOME_MPV15.zip  WORC_MPV15.zip
[01;34mCalv2015[0m/       [01;34mDorc2015[0m/       [01;34mKent2015[0m/       [01;34mStma2015[0m/
CALV_MPV15.zip  DORC_MPV15.zip  KENT_M

In [0]:
data_files = glob.glob("*/ATDATA/DATABASE/*2015.dbf")

In [0]:
# Setup transformer
crs_102285 = CRS.from_proj4("+proj=lcc +lat_1=38.3 +lat_2=39.45 +lat_0=37.66666666666666 +lon_0=-77 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m no_defs")
# This is obtained from http://epsg.io/102285
crs_4326 = CRS.from_epsg(4326)
transformer = Transformer.from_crs(crs_102285, crs_4326)

In [10]:
import string
# !rm locations.csv errors.csv
output = open("labels2.csv", "w")
# errors = open("erros.csv", "w")
output.write("#,ACCTID,class,label text\n")
# open and prepare filse

row_num = 1

building_types = {}

for data_file in data_files:
  print(f"Processing {data_file}")
  dbf = DBF(data_file, encoding='iso-8859-1') # 
  for row in dbf:
    acctid = row["ACCTID"]
    story = row["DESCSTYL"]
    residential = row["RESIDENT"]
    lu = row["LU"]

    building_type = 0
    if lu in ["C", "EC"]: # commercial, exempt commercial
      # print("commercial")
      building_type = 1
          
    story = row["BLDG_STORY"]
    story_num = 0
    if "STRY" in row["DESCSTYL"] and "INDUSTRY" not in row["DESCSTYL"]:
      story = row["DESCSTYL"].replace("Split Foyer", "2").lstrip(string.ascii_letters + " ").split(" Story")[0].replace(" 1/2", ".5")
      # FIXME: Add support for different types of residential buildings here. E.g. TH, SF, CN, AP, etc.
      story_num = min(int(float(story) * 2 - 1), 8)
    elif story != "":
      story_num = min(int(story.lstrip("0")) * 2 - 1, 8)
    else: # Unknown. Set everything to 0.
      # if building_type == 1:
      #   print("Undoing commercial")
      # else:
      #   print("Not undoing commercial")
      building_type = 0
      story_num = 0
    
      

    class_num = building_type * 8 + story_num
    
    desc = ""

    building_types = ["residential", "commercial"]
    stories = ["Cannot tell", "1", "1.5", "2", "2.5", "3", "3.5", "4", "> 4"]

    if class_num != 0:
      desc += building_types[building_type]
      
    desc += stories[story_num]
      
    
    # print(acctid, story, residential)
    output.write(f"{row_num},{acctid},{class_num},{desc}\n")
    row_num += 1
  output.flush()

Processing Alle2015/ATDATA/DATABASE/Alle2015.dbf
Processing Anne2015/ATDATA/DATABASE/Anne2015.dbf
Processing Baco2015/ATDATA/DATABASE/Baco2015.dbf
Processing Baci2015/ATDATA/DATABASE/Baci2015.dbf
Processing Calv2015/ATDATA/DATABASE/Calv2015.dbf
Processing Caro2015/ATDATA/DATABASE/Caro2015.dbf
Processing Carr2015/ATDATA/DATABASE/Carr2015.dbf
Processing Ceci2015/ATDATA/DATABASE/Ceci2015.dbf
Processing Char2015/ATDATA/DATABASE/Char2015.dbf
Processing Dorc2015/ATDATA/DATABASE/Dorc2015.dbf
Processing Fred2015/ATDATA/DATABASE/Fred2015.dbf
Processing Garr2015/ATDATA/DATABASE/Garr2015.dbf
Processing Harf2015/ATDATA/DATABASE/Harf2015.dbf
Processing Howa2015/ATDATA/DATABASE/Howa2015.dbf
Processing Kent2015/ATDATA/DATABASE/Kent2015.dbf
Processing Mont2015/ATDATA/DATABASE/Mont2015.dbf
Processing Prin2015/ATDATA/DATABASE/Prin2015.dbf
Processing Quee2015/ATDATA/DATABASE/Quee2015.dbf
Processing Some2015/ATDATA/DATABASE/Some2015.dbf
Processing Stma2015/ATDATA/DATABASE/Stma2015.dbf
Processing Talb2015/