All the data transformations that need to happen, from raw to final:
* NHPD Raw -> Change any null values for fields to "Not Available"
* NHPD Carto -> Save as a CSV to reduce storage on Carto server
* NHPD for Report -> Clean the lat lon and save as spatial file 
* DACs -> Join DAC Percentiles with Mapping Display Data. Make sure data types are correct (floats, not strings). Join QCT Data.
  * Carto_DACs -> have as few fields as possible, short names
  * Report_DACs -> have all the fields, named well
* Tribes and Territories -> Get from GIS SQLite. Naming format should be same as Counties and States.

In [109]:
import pandas as pd
import geopandas as gpd
import numpy as np
import sqlite3 as sql
import subprocess
import sys
import os
from os import mkdir
from os.path import exists, sep

In [146]:
# Enter directory paths here
WD = "/Users/anushreechaudhuri/pCloud Drive/MIT/MIT Work/DC DOE/app_files/equity-tool/data"
RAW = "/Users/anushreechaudhuri/pCloud Drive/MIT/MIT Work/DC DOE/app_files/equity-tool/data/raw"
SQL_OUTPUT = "/Users/anushreechaudhuri/pCloud Drive/MIT/MIT Work/DC DOE/app_files/equity-tool/data/sql_output"
CARTO = "/Users/anushreechaudhuri/pCloud Drive/MIT/MIT Work/DC DOE/app_files/equity-tool/data/carto"
REPORT = "/Users/anushreechaudhuri/pCloud Drive/MIT/MIT Work/DC DOE/app_files/equity-tool/data/report"

In [185]:
os.chdir(RAW)
nhpd = pd.read_csv("nhpd.csv")

In [190]:
nhpd = nhpd[['Property Name','Street Address', 'City', 'State', 'Zip Code','Subsidy Status', 'Subsidy Name', 'Subsidy Subname',
       'Start Date', 'End Date', 'Assisted Units', 
       'Owner Name', 'Owner Type', 'Manager Name', 'Manager Type',
       '0-1 Bedroom Units', 'Two Bedroom Units', 'Three+ Bedroom Units',
       'Target Population', 'Earliest Construction Date',
       'Latest Construction Date', 'Rent to FMR Ratio', 'Known Total Units',
       'Renewal Type Name', 'Inactive Status Description', 'Construction Type',
    'Renewal Name', 'Renewal ID', 'SOA Code', 'Latitude', 'Longitude']]
carto_nhpd = nhpd[['Property Name','Street Address', 'City', 'State', 'Zip Code','Subsidy Status', 'Subsidy Name', 'Subsidy Subname',
       'Start Date', 'End Date', 'Assisted Units', 
       'Owner Name', 'Owner Type', 'Manager Name', 'Manager Type',
       '0-1 Bedroom Units', 'Two Bedroom Units', 'Three+ Bedroom Units',
       'Target Population', 'Earliest Construction Date',
       'Latest Construction Date', 'Rent to FMR Ratio', 'Known Total Units',
       'Renewal Type Name', 'Inactive Status Description', 'Construction Type',
    'Renewal Name', 'Renewal ID', 'SOA Code', 'Latitude', 'Longitude']]

In [191]:
# Rename columns in NHPD
carto_nhpd.rename(columns={"Property Name": "name", 'Street Address': 'add', 'City': 'city', 'State': 'state', 'Zip Code': 'zip', 'Subsidy Status': 'status', 'Subsidy Name':'subsidy','Subsidy Subname':'subnm', 'Start Date': 'start', 'End Date': 'end', 'Assisted Units': 'units', 'Owner Name': 'owner', 'Owner Type': 'otype', 'Manager Name': 'mgr', 'Manager Type': 'mtype', '0-1 Bedroom Units': '0-1', 'Two Bedroom Units': '2-4', 'Three+ Bedroom Units': '5-+', 'Target Population': 'pop', 'Earliest Construction Date': 'econ', 'Latest Construction Date': 'lcon', 'Rent to FMR Ratio': 'rtfmr', 'Known Total Units': 'totun', 'Renewal Type Name': 'rtype', 'Inactive Status Description': 'inact', 'Construction Type': 'ctype', 'Renewal Name': 'rname', 'Renewal ID': 'rid', 'SOA Code': 'soa', 'Latitude': 'latitude', 'Longitude': 'longitude'}, inplace=True)

In [192]:
def str_to_float(x):
    try:
        x = float(x)
    except:
        x = np.nan
    return x

In [193]:
nhpd["lat"] = nhpd["Latitude"].apply(str_to_float)
nhpd["lon"] = nhpd["Longitude"].apply(str_to_float)
# Drop rows with NaN
nhpd = nhpd.dropna(subset=["lat", "lon"])
nhpd = gpd.GeoDataFrame(
    nhpd, geometry=gpd.points_from_xy(nhpd.lon, nhpd.lat)
)

In [194]:
# Export carto_nhpd to CARTO
os.chdir(CARTO)
carto_nhpd.to_csv("carto_nhpd.csv", index=False)

In [195]:
# Export nhpd to REPORT as a geojson
os.chdir(REPORT)
nhpd.to_file("nhpd.geojson", driver="GeoJSON")

  pd.Int64Index,
