In [48]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [49]:
df = pd.read_csv('cleaned_public_policy.csv',)

In [50]:
df.columns

Index(['Year', 'State', 'Housing_Prices_Quarter', 'Community_Spending',
       'Unexpected_Housing_Spending', 'Govt_Direct_Expenditure',
       'Property_Rights', 'Number_Interest_Groups', 'No_Discrimination_Laws',
       'Private_Fair_Housing', 'Public_Fair_Housing', 'Urban_Fair_Housing',
       'Banned_Discrimination_Public_Housing',
       'Banned_Discrimination_Private_Housing', 'Legislation_Public_Housing',
       'Rent_Control', 'State_Aid_Allowed', 'Federal_Aid_Allowed',
       'Prohibit_Rent_Control'],
      dtype='object')

## Primary Goals for EDA:
1) Identify the distribution of housing prices across different states
2) Identify if the categorical data is correlated - I imagine "liberal" states will have more housing protections and thus these housing protections will be correlated.
3) Observe if we can summarize data across time: i.e., is there a particular time point in which housing policies were passed?

In [55]:
cat_cols = ['Private_Fair_Housing','No_Discrimination_Laws',
             'Public_Fair_Housing','Urban_Fair_Housing',
             'Banned_Discrimination_Public_Housing',
             'Banned_Discrimination_Private_Housing',
             'Legislation_Public_Housing','Rent_Control',
             'State_Aid_Allowed','Federal_Aid_Allowed','Prohibit_Rent_Control']
df[cat_cols] = df[cat_cols].astype('category')
df['Year'] = pd.to_datetime(df['Year']).dt.year

In [56]:
print(df.dtypes)
df.head(5)

Year                                        int64
State                                      object
Housing_Prices_Quarter                    float64
Community_Spending                        float64
Unexpected_Housing_Spending               float64
Govt_Direct_Expenditure                   float64
Property_Rights                           float64
Number_Interest_Groups                    float64
No_Discrimination_Laws                   category
Private_Fair_Housing                     category
Public_Fair_Housing                      category
Urban_Fair_Housing                       category
Banned_Discrimination_Public_Housing     category
Banned_Discrimination_Private_Housing    category
Legislation_Public_Housing               category
Rent_Control                             category
State_Aid_Allowed                        category
Federal_Aid_Allowed                      category
Prohibit_Rent_Control                    category
dtype: object


Unnamed: 0,Year,State,Housing_Prices_Quarter,Community_Spending,Unexpected_Housing_Spending,Govt_Direct_Expenditure,Property_Rights,Number_Interest_Groups,No_Discrimination_Laws,Private_Fair_Housing,Public_Fair_Housing,Urban_Fair_Housing,Banned_Discrimination_Public_Housing,Banned_Discrimination_Private_Housing,Legislation_Public_Housing,Rent_Control,State_Aid_Allowed,Federal_Aid_Allowed,Prohibit_Rent_Control
0,1900,AK,0.6586,0.000862,-0.000247,0.589,0.045116,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
1,1901,AK,0.6586,0.000862,-0.000247,0.589,0.045116,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
2,1902,AK,0.6586,0.000862,-0.000247,0.589,0.045116,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
3,1903,AK,0.6586,0.000862,-0.000247,0.589,0.045116,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
4,1904,AK,0.6586,0.000862,-0.000247,0.589,0.045116,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0


In [58]:
rent_control_states = ['CA','MD','NJ','NY']
non_rent_control_states = set(df['State'].unique()) - set(rent_control_states)

In [59]:
df.corr()['Housing_Prices_Quarter']

Year                           0.732366
Housing_Prices_Quarter         1.000000
Community_Spending             0.527104
Unexpected_Housing_Spending    0.327005
Govt_Direct_Expenditure        0.564714
Property_Rights               -0.130334
Number_Interest_Groups         0.357449
Name: Housing_Prices_Quarter, dtype: float64

In [77]:
years_of_change = pd.DataFrame(columns=['state','policy','year'])
for st_name,st_df in df.groupby('State'):
    for col in cat_cols:
        this_yr = st_df[st_df[col] != st_df[col].shift(-1)]['Year'].values[0]
        years_of_change = years_of_change.append({'state':st_name,'policy':col,'year':this_yr},ignore_index=True)        
years_of_change.drop(years_of_change[years_of_change['year'] == 2020].index,inplace = True) #drop 2020, since this means the policy never changed

In [78]:
years_of_change

Unnamed: 0,state,policy,year
4,AK,Banned_Discrimination_Public_Housing,1961
5,AK,Banned_Discrimination_Private_Housing,1961
17,AL,Legislation_Public_Housing,1934
21,AL,Prohibit_Rent_Control,1992
28,AR,Legislation_Public_Housing,1936
...,...,...,...
522,WI,Banned_Discrimination_Private_Housing,1964
523,WI,Legislation_Public_Housing,1934
525,WI,State_Aid_Allowed,1948
527,WI,Prohibit_Rent_Control,1991
