In [37]:
import numpy as np
import pandas as pd
from scipy import stats
from ydata_profiling import ProfileReport
import prince

In [22]:
ahs = pd.read_csv("ahs2023_cleaned.csv")

In [23]:
ahs.head().T

Unnamed: 0,0,1,2,3,4
DIVISION,New England,West South Central,West South Central,West South Central,West North Central
TENURE,,Rented,Owned or being bought by someone in your house...,Owned or being bought by someone in your house...,Rented
YRBUILT,1980,1970,1970,1970,1920
UNITSIZE,"2,500 to 2,999 square feet",,"2,000 to 2,499 square feet","1,000 to 1,499 square feet",
HSHLDTYPE,,Married-couple family household,Married-couple family household,"Other family household: male householder, no w...","Nonfamily household: female householder, livin..."
HHRACE,,White only,White only,White only,Black only
HHSEX,,Male,Male,Male,Female
HINCP,,48000.0,292500.0,56000.0,36000.0
TOTHCAMT,,1093.0,810.0,489.0,845.0
MARKETVAL,,,245790.0,158200.0,


In [None]:
#ahs = ahs.select_dtypes(include=['object']).apply(lambda x: x.astype('category'))

In [18]:
profile = ProfileReport(ahs,
                        title = "2023 American Housing Survey",
                        html = {'style': {'full_width': True}},
                        minimal = False)
profile.to_notebook_iframe()

100%|██████████| 28/28 [00:00<00:00, 162.30it/s]<00:00, 29.96it/s, Describe variable: MOLDBATH]
Summarize dataset: 100%|██████████| 38/38 [00:04<00:00,  8.19it/s, Completed]                  
Generate report structure: 100%|██████████| 1/1 [00:05<00:00,  5.46s/it]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  1.66it/s]


### Using statistical hypothesis tests to gauge differences between renters and people who own their homes

In [24]:
ahs['TENURE'].value_counts()

TENURE
Owned or being bought by someone in your household    28192
Rented                                                19735
Occupied without payment of rent                        600
Name: count, dtype: int64

In [25]:
ahs.groupby('TENURE').agg({'HINCP': ['count', 'mean']})

Unnamed: 0_level_0,HINCP,HINCP
Unnamed: 0_level_1,count,mean
TENURE,Unnamed: 1_level_2,Unnamed: 2_level_2
Occupied without payment of rent,600,54319.523333
Owned or being bought by someone in your household,28192,134696.380746
Rented,19735,60886.215455


In [26]:
stats.f_oneway(ahs.query("TENURE == 'Owned or being bought by someone in your household'")['HINCP'],
               ahs.query("TENURE == 'Rented'")['HINCP'],
               ahs.query("TENURE == 'Occupied without payment of rent'")['HINCP'])

F_onewayResult(statistic=np.float64(1557.3251799219104), pvalue=np.float64(0.0))

In [32]:
ahs_corr = ahs[['MARKETVAL','YRBUILT']].dropna()
ahs_corr.corr()

Unnamed: 0,MARKETVAL,YRBUILT
MARKETVAL,1.0,0.035188
YRBUILT,0.035188,1.0


In [33]:
stats.pearsonr(ahs_corr['MARKETVAL'], ahs_corr['YRBUILT'])

PearsonRResult(statistic=np.float64(0.03518752110020666), pvalue=np.float64(2.628878555092002e-10))

### Using a measurement model to build an index of how much each house/apartment is in disrepair (a "flophouse index")


In [35]:
brokenness = ahs[['FUSEBLOW',
       'SEWBREAK', 'ROACH', 'RODENT', 'NOWIRE', 'PLUGS', 'COLD', 'NOTOIL',
       'NOWAT', 'FLOORHOLE', 'FNDCRUMB', 'PAINTPEEL', 'ROOFHOLE', 'ROOFSAG',
       'ROOFSHIN', 'WALLCRACK', 'WALLSIDE', 'WALLSLOPE', 'WINBOARD',
       'WINBROKE', 'LEAKI', 'MOLDBATH']].dropna()
brokenness

Unnamed: 0,FUSEBLOW,SEWBREAK,ROACH,RODENT,NOWIRE,PLUGS,COLD,NOTOIL,NOWAT,FLOORHOLE,...,ROOFHOLE,ROOFSAG,ROOFSHIN,WALLCRACK,WALLSIDE,WALLSLOPE,WINBOARD,WINBROKE,LEAKI,MOLDBATH
1,No fuses / breakers blown in the last 3 months,No breakdowns in the last 3 months,Seen a few times in the last 12 months,No signs in the last 12 months,Concealed,Not broken,Broken,Broken,Broken,Broken,...,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Not broken,Broken,Broken
2,No fuses / breakers blown in the last 3 months,No breakdowns in the last 3 months,No signs in the last 12 months,No signs in the last 12 months,Concealed,Not broken,Broken,Broken,Broken,Broken,...,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken
3,No fuses / breakers blown in the last 3 months,No breakdowns in the last 3 months,No signs in the last 12 months,Seen a few times in the last 12 months,Concealed,Not broken,Broken,Broken,Broken,Broken,...,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Not broken,Broken
6,No fuses / breakers blown in the last 3 months,No breakdowns in the last 3 months,No signs in the last 12 months,No signs in the last 12 months,Exposed,Not broken,Broken,Broken,Broken,Broken,...,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken
7,No fuses / breakers blown in the last 3 months,No breakdowns in the last 3 months,No signs in the last 12 months,No signs in the last 12 months,Concealed,Not broken,Broken,Broken,Broken,Broken,...,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55659,No fuses / breakers blown in the last 3 months,No breakdowns in the last 3 months,No signs in the last 12 months,No signs in the last 12 months,Concealed,Not broken,Broken,Broken,Broken,Broken,...,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken
55660,No fuses / breakers blown in the last 3 months,No breakdowns in the last 3 months,No signs in the last 12 months,No signs in the last 12 months,Concealed,Not broken,Broken,Broken,Broken,Broken,...,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken
55661,No fuses / breakers blown in the last 3 months,No breakdowns in the last 3 months,No signs in the last 12 months,No signs in the last 12 months,Concealed,Not broken,Broken,Broken,Broken,Broken,...,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken
55662,No fuses / breakers blown in the last 3 months,No breakdowns in the last 3 months,No signs in the last 12 months,No signs in the last 12 months,Concealed,Not broken,Broken,Broken,Broken,Broken,...,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken,Broken


In [38]:
MCA = prince.MCA(n_components=2)
MCA = MCA.fit(brokenness)

In [42]:
MCA.column_coordinates(brokenness).sort_values(by=1)

Unnamed: 0,0,1
ROOFHOLE__Not broken,4.04205,-2.600202
ROOFSAG__Not broken,3.431558,-2.540395
WALLSLOPE__Not broken,3.965754,-2.313698
ROOFSHIN__Not broken,2.437856,-1.803373
WINBOARD__Not broken,3.506439,-1.75152
WALLSIDE__Not broken,2.759549,-1.172961
WINBROKE__Not broken,2.193618,-0.614489
ROACH__Seen monthly in the last 12 months,1.026822,-0.498963
FNDCRUMB__Not broken,1.745907,-0.494706
RODENT__Seen monthly in the last 12 months,1.372743,-0.10372


In [41]:
MCA.row_coordinates(brokenness)

Unnamed: 0,0,1
1,0.184199,-0.118847
2,-0.165798,-0.040247
3,0.070236,0.214008
6,-0.038498,0.092571
7,-0.165798,-0.040247
...,...,...
55659,-0.165798,-0.040247
55660,-0.165798,-0.040247
55661,-0.165798,-0.040247
55662,-0.165798,-0.040247
