In [79]:
#Pandas display options
pd.options.display.max_rows = 999

In [80]:
#Use gdown to download the NHPD data
#!pip install gdown

In [81]:
#Download the NHPD data into the ./data/ folder
import gdown

url = 'https://drive.google.com/u/0/uc?id=19EMDNi6AT4pecx_iWltZv0GxLYqv4Eof&export=download'
output = './data/active_and_inconclusive_communities.xlsx'
gdown.download(url, output, quiet=False)


Downloading...
From: https://drive.google.com/u/0/uc?id=19EMDNi6AT4pecx_iWltZv0GxLYqv4Eof&export=download
To: /Users/m0u00pf/sep21-housing-insecurity/code/mervesa/data/active_and_inconclusive_communities.xlsx
44.4MB [00:01, 39.3MB/s]


'./data/active_and_inconclusive_communities.xlsx'

In [109]:
#Read the downloaded file
import pandas as pd
nhpd_data=pd.read_excel('./data/active_and_inconclusive_communities.xlsx')

In [110]:
nhpd_data.shape

(82287, 252)

In [111]:
#Eliminate any columns that have more than 97% nulls
null_columns=(nhpd_data.isnull().sum()/len(nhpd_data))[nhpd_data.isnull().sum()/len(nhpd_data)>0.97].index

In [112]:
nhpd_data.drop(null_columns,axis=1,inplace=True)

In [113]:
#Incorporate Anabel and Sam's analysis of full and/or informative columns
sam_colstokeep=['NHPDPropertyID',
'PropertyName',
'PropertyAddress',
'CBSACode',
'CountyCode',
'CensusTract',
'City',
'County',
'State',
'Latitude',
'Longitude',
'ActiveSubsidies',
'TotalInconclusiveSubsidies',
'TotalInactiveSubsidies',
'TotalUnits',
'EarliestEndDate',
'LatestEndDate',
'StudioOneBedroomUnits',
'TwoBedroomUnits',
'ThreePlusBedroomUnits',
'FairMarketRent_2BR',
'EarliestConstructionDate',
'LatestConstructionDate',
'NumberActiveSection8',
'NumberInconclusiveSection8',
'NumberInactiveSection8',
'NumberActiveSection202',
'NumberActiveHUDInsured',
'NumberInconclusiveHUDInsured',
'NumberInactiveHud',
'NumberActiveLihtc',
'NumberInconclusiveLihtc',
'NumberInactiveLihtc',
'NumberActiveSection515',
'NumberInactiveSection515',
'NumberActiveHome',
'NumberInactiveHome',
'NumberActivePublicHousing',
'NumberInactivePublicHousing',
'NumberActiveState',
'NumberInactiveState',
'NumberActivePBV',
'NumberActiveMR',
'OldNHPDPropertyID']
anabel_colstokeep=['ActiveSubsidies',
'TotalInconclusiveSubsidies',
'TotalInactiveSubsidies',
'TotalUnits',
'Owner',
'OwnerType',
'ManagerName',
'ManagerType',
'ReacScore1',
'ReacScore2',
'ReacScore3',
'StudioOneBedroomUnits',
'TwoBedroomUnits',
'ThreePlusBedroomUnits',
'PercentofELIHouseholds',
'TargetTenantType',
'FairMarketRent_2BR',
'OccupancyRate',
'AverageMonthsOfTenancy']
colstokeep=list(set(sam_colstokeep+anabel_colstokeep))
nhpd_data=nhpd_data[colstokeep]

In [114]:
nhpd_data.shape

(82287, 55)

In [115]:
#Read housing loss data
#Hillsborough
url = 'https://drive.google.com/uc?id=1abt4fLPO__KxBLz9SXue5VKeZN3cUcCF&export=download'
output = './data/hills_loss.csv'
gdown.download(url, output, quiet=False)
#Miami-Dade
url = 'https://drive.google.com/uc?id=1gLojTGS6HQ1s60gmIxFCq2xObB1634BU&export=download'
output = './data/miami_loss.csv'
gdown.download(url, output, quiet=False)
#Orange
url = 'https://drive.google.com/uc?id=15ee2QrH8a_yuIfptGwAsVF-tWGTEYXCy&export=download'
output = './data/orange_loss.csv'
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1abt4fLPO__KxBLz9SXue5VKeZN3cUcCF&export=download
To: /Users/m0u00pf/sep21-housing-insecurity/code/mervesa/data/hills_loss.csv
100%|██████████| 207k/207k [00:00<00:00, 4.51MB/s]
Downloading...
From: https://drive.google.com/uc?id=1gLojTGS6HQ1s60gmIxFCq2xObB1634BU&export=download
To: /Users/m0u00pf/sep21-housing-insecurity/code/mervesa/data/miami_loss.csv
100%|██████████| 321k/321k [00:00<00:00, 7.35MB/s]
Downloading...
From: https://drive.google.com/uc?id=15ee2QrH8a_yuIfptGwAsVF-tWGTEYXCy&export=download
To: /Users/m0u00pf/sep21-housing-insecurity/code/mervesa/data/orange_loss.csv
100%|██████████| 143k/143k [00:00<00:00, 3.88MB/s]


'./data/orange_loss.csv'

In [116]:
housing_loss=pd.concat([pd.read_csv('./data/hills_loss.csv'),
                       pd.read_csv('./data/miami_loss.csv'),
                       pd.read_csv('./data/orange_loss.csv')])

In [117]:
#Get census tracts, compare with those in NHPD data
housing_loss.rename(columns={'census_tract_GEOID':'CensusTract'},inplace=True)

In [118]:
nhpd_data=nhpd_data.merge(housing_loss,how="inner",on=['CensusTract'])

In [119]:
nhpd_data.columns

Index(['NumberInconclusiveHUDInsured', 'EarliestConstructionDate',
       'TotalUnits', 'NumberInactiveLihtc', 'EarliestEndDate',
       'NumberActiveHUDInsured', 'NumberInactiveHud', 'ManagerName',
       'NumberActiveState', 'CountyCode',
       ...
       'lien-foreclosure-rate-2019', 'avg-eviction-rate',
       'ratio-to-mean-foreclosure-rate', 'ratio-to-mean-eviction-rate',
       'avg-housing-loss-rate', 'evictions-pct-total-housing-loss',
       'housing-loss-index', 'county_GEOID', 'county', 'state'],
      dtype='object', length=127)

In [120]:
# Thanks to Laura Prichard - Homogenizing column formats
data = nhpd_data.copy()
data['PropertyName'] = data['PropertyName'].str.upper()
data['PropertyAddress'] = data['PropertyAddress'].str.title()
data['City'] = data['City'].str.title()
data['County'] = data['County'].str.title()
data['Owner'] = data['Owner'].str.upper()
data['ManagerName'] = data['ManagerName'].str.upper()

# Replace state name for Guam
data['State'].loc[data['State']=='WP'] = 'GU'

# Target Tenant Type cleaning
data['TargetTenantType'] = data['TargetTenantType'].str.replace("Eldery or Disabled", "Elderly or Disabled")
data['TargetTenantType'] = data['TargetTenantType'].str.replace("Mixed;Link", "Mixed")
data['TargetTenantType'] = data['TargetTenantType'].str.replace("Indv. families - not eld/ handicap", "Family")
data['TargetTenantType'] = data['TargetTenantType'].str.replace("Senior", "Elderly")
data['TargetTenantType'] = data['TargetTenantType'].str.replace("Family & Elderly", "Mixed")
data['TargetTenantType'] = data['TargetTenantType'].str.replace("Homeless Veterans", "Veterans")
data['TargetTenantType'] = data['TargetTenantType'].str.replace("Mixed Income", "Mixed")
data['TargetTenantType'] = data['TargetTenantType'].str.replace("OTHER", "Mixed")
data['TargetTenantType'] = data['TargetTenantType'].str.replace("Affordable", "Low Income")
data['TargetTenantType'] = data['TargetTenantType'].str.title()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
  data['TargetTenantType'] = data['TargetTenantType'].str.replace("Indv. families - not eld/ handicap", "Family")


In [122]:
data.to_excel('nhpd_with_housingloss.xlsx')