# Analyzing Gentrification with Zillow Economics Data

### Data and library Imports

In [1]:
# Methods/functions we created
import City_Preprocessing as zillowPro
import Gentrification_Control as gentPro
import Time_Series_KMeans_Utils as tsUtils

# Libraries
import pandas as pd
import numpy as np
from tslearn.metrics import dtw, cdist_dtw
from tslearn.preprocessing import TimeSeriesResampler
from tslearn.utils import to_time_series, to_time_series_dataset
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.preprocessing import MinMaxScaler
from tslearn.clustering import TimeSeriesKMeans

In [2]:
# Raw Zillow city time series data with all 81 features
city_raw = pd.read_csv('/Users/briankalinowski/PycharmProjects/CIS600/zecon/City_time_series.csv')

# Rental Mertrics

In [3]:
# Geo and Rental features
rental_features = ['Date', 'RegionName',
                   'PriceToRentRatio_AllHomes',
                   'ZRI_AllHomes',
                   'ZriPerSqft_AllHomes',
                   'Zri_SingleFamilyResidenceRental']

# all our rental measurement features
rental_values = ['PriceToRentRatio_AllHomes',
                 'ZRI_AllHomes',
                 'ZriPerSqft_AllHomes',
                 'Zri_SingleFamilyResidenceRental']

lookup_ix = ['State', 'City']
full_ix = ['State', 'City', 'Date']

city_rental_data = zillowPro.process_city_data(city_raw, rental_features, rental_values, lookup_ix, full_ix)

print('Rental Data Shape:', city_rental_data.shape)
print('Unique State/City Groups:', city_rental_data.groupby(['State', 'City']).ngroups)
city_rental_data.head(8)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


Rental Data Shape: (80820, 7)
Unique State/City Groups: 10132


Unnamed: 0,State,City,Date,AVG_PriceToRentRatio_AllHomes,AVG_ZRI_AllHomes,AVG_ZriPerSqft_AllHomes,AVG_Zri_SingleFamilyResidenceRental
0,NC,Lumberton,2010,7.63,800.0,0.597333,814.666667
1,NC,Lumberton,2011,7.516667,799.083333,0.602833,809.333333
2,NC,Lumberton,2012,6.9225,806.166667,0.602333,816.0
3,NC,Lumberton,2013,6.505833,798.333333,0.603833,806.25
4,NC,Lumberton,2014,6.76,778.083333,0.5925,783.0
5,NC,Lumberton,2015,7.621667,813.0,0.601333,815.083333
6,NC,Lumberton,2016,7.256667,816.666667,0.601667,819.416667
7,NC,Lumberton,2017,8.270833,803.833333,0.599667,807.166667


### Gentrification Control Cities

`('NC', 'Asheville'),
('TN', 'Nashville'),
('CA', 'Oakland'), 
('SC', 'Charleston'), 
('CA', 'Anaheim'),
('CA', 'Berkeley'), 
('WA', 'Seattle'), 
('TX', 'Austin'), 
('CA', 'Los Angeles'), 
('CA', 'San Diego'),
('TX', 'Midland'), 
('DC', 'Washington'), 
('OR', 'Portland'), 
('CA', 'Sacramento'), 
('NY', 'New York'),
('MI', 'Royal Oak'), 
('AR', 'Bentonville'), 
('CA', 'Costa Mesa'), 
('CA', 'San Marcos'), 
('MI', 'Ann Arbor'),
('NJ', 'Jersey City'), 
('MA', 'Somerville'), 
('CO', 'Thornton'), 
('CA', 'Vista'), 
('CA', 'Long Beach'),
('PA', 'Pittsburgh'), 
('MA', 'Quincy'), 
('CA', 'Napa'), 
('OR', 'Hillsboro'), 
('CO', 'Denver'),
('CA', 'Hayward')`

In [4]:
city_rental_data.loc[city_rental_data['State'] == 'GENT_CONTROL'].loc[city_rental_data['City'] == 'GENT_CONTROL']

Unnamed: 0,State,City,Date,AVG_PriceToRentRatio_AllHomes,AVG_ZRI_AllHomes,AVG_ZriPerSqft_AllHomes,AVG_Zri_SingleFamilyResidenceRental
80812,GENT_CONTROL,GENT_CONTROL,2010,14.120115,1687.16092,1.236736,1731.850575
80813,GENT_CONTROL,GENT_CONTROL,2011,13.477414,1717.321839,1.261351,1763.37069
80814,GENT_CONTROL,GENT_CONTROL,2012,13.493534,1750.606322,1.290908,1793.359195
80815,GENT_CONTROL,GENT_CONTROL,2013,14.707213,1820.551724,1.342598,1859.735632
80816,GENT_CONTROL,GENT_CONTROL,2014,15.361609,1930.813218,1.414253,1967.186782
80817,GENT_CONTROL,GENT_CONTROL,2015,15.245862,2090.109195,1.539569,2135.922414
80818,GENT_CONTROL,GENT_CONTROL,2016,15.726494,2203.390805,1.629098,2245.747126
80819,GENT_CONTROL,GENT_CONTROL,2017,16.647184,2266.12931,1.679609,2303.793103


### Min-Max Scaling

In [7]:
features_to_scale = ['AVG_PriceToRentRatio_AllHomes',
                     'AVG_ZRI_AllHomes',
                     'AVG_ZriPerSqft_AllHomes',
                     'AVG_Zri_SingleFamilyResidenceRental']

city_rental_data = zillowPro.min_max_scale(city_rental_data, features_to_scale)
city_rental_data.head(8)

Unnamed: 0,State,City,Date,AVG_PriceToRentRatio_AllHomes,AVG_ZRI_AllHomes,AVG_ZriPerSqft_AllHomes,AVG_Zri_SingleFamilyResidenceRental
0,NC,Lumberton,2010,0.144786,0.012308,0.047074,0.013163
1,NC,Lumberton,2011,0.141373,0.012265,0.04816,0.01291
2,NC,Lumberton,2012,0.123478,0.012597,0.048061,0.013226
3,NC,Lumberton,2013,0.11093,0.01223,0.048357,0.012764
4,NC,Lumberton,2014,0.118585,0.011282,0.046119,0.011661
5,NC,Lumberton,2015,0.144535,0.012917,0.047864,0.013183
6,NC,Lumberton,2016,0.133542,0.013089,0.047929,0.013388
7,NC,Lumberton,2017,0.164086,0.012488,0.047534,0.012807


### Rental Dynamic Time Warping Similarity With Gentrified Cities

In [8]:
city_dtw_rentals = gentPro.set_geo_dtw_data(city_rental_data)

In [10]:
# Cities MOST similar to our gentrification control
city_dtw_rentals.sort_values(by='dtw_score', ascending=True).reset_index(drop=True).head(21)

Unnamed: 0,State,City,dtw_score,dtw_value,dtw_label
0,GENT_CONTROL,GENT_CONTROL,0.0,0.0,HIGH_GENT
1,CA,Santee,0.064712,0.0,HIGH_GENT
2,CA,Rohnert Park,0.06526,0.0,HIGH_GENT
3,CA,Pinole,0.06863,0.0,HIGH_GENT
4,CA,Oceanside,0.070289,0.0,HIGH_GENT
5,MN,Excelsior,0.074732,0.0,HIGH_GENT
6,CA,Valinda,0.080531,0.0,HIGH_GENT
7,CA,Lakeside,0.080739,0.0,HIGH_GENT
8,CA,Cotati,0.081757,0.0,HIGH_GENT
9,CA,Concord,0.083737,0.0,HIGH_GENT


In [11]:
# Cities LEAST similar to our gentrification control
city_dtw_rentals.sort_values(by='dtw_score', ascending=False).reset_index(drop=True).head(21)

Unnamed: 0,State,City,dtw_score,dtw_value,dtw_label
0,FL,Jupiter Island,3.905696,4.0,NO_GENT
1,CA,Atherton,2.849645,4.0,NO_GENT
2,CA,Beverly Hills,2.413206,3.0,LOW_GENT
3,CA,Belvedere,2.314848,3.0,LOW_GENT
4,FL,Palm Beach,2.299241,3.0,LOW_GENT
5,CA,Stinson Beach,2.118444,3.0,LOW_GENT
6,CA,Portola Valley,2.034014,3.0,LOW_GENT
7,CA,Hillsborough,2.028716,3.0,LOW_GENT
8,CA,Los Altos Hills,2.010883,3.0,LOW_GENT
9,FL,Gulf Stream,1.990482,3.0,LOW_GENT


### DTW Time-Series Kmeans Clustering 

https://www.zillow.com/research/data/

https://tslearn.readthedocs.io/en/latest/gen_modules/clustering/tslearn.clustering.TimeSeriesKMeans.html#

In [12]:
zillow_zri_ts = pd.read_csv('/Users/briankalinowski/Desktop/CIS600_DataMining/Zillow_Data_Clean/zillow_portal_zri.csv')
zillow_zri_ts = tsUtils.clean_zillow_ts_data(zillow_zri_ts, lookup_ix)
zillow_zri_ts.head()

Unnamed: 0,State,City,2010-11,2010-12,2011-01,2011-02,2011-03,2011-04,2011-05,2011-06,...,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04
0,AK,Anchor Point,0.044949,0.044552,0.043944,0.043563,0.043374,0.041391,0.039998,0.041483,...,0.034593,0.034008,0.033757,0.033519,0.032728,0.030869,0.02906,0.027916,0.02771,0.027707
1,AK,Anchorage,0.071667,0.071033,0.070064,0.069332,0.068906,0.072616,0.074815,0.075777,...,0.069615,0.068437,0.067792,0.067546,0.067895,0.067808,0.067742,0.067236,0.067113,0.067045
2,AK,Fairbanks,0.056009,0.055513,0.054756,0.05423,0.053942,0.051712,0.051085,0.051984,...,0.053679,0.052698,0.052258,0.052176,0.05275,0.052871,0.052901,0.052157,0.051618,0.051064
3,AK,Homer,0.057596,0.057087,0.056308,0.055761,0.05546,0.052859,0.050775,0.049708,...,0.041893,0.041128,0.040759,0.040648,0.039949,0.038124,0.036049,0.034787,0.034602,0.034846
4,AK,Kenai,0.05568,0.055188,0.054435,0.053913,0.053628,0.051139,0.049635,0.048828,...,0.037217,0.036397,0.036122,0.036204,0.036433,0.035848,0.03514,0.034262,0.034175,0.034184


In [13]:
# Time-Series-KMeans
zri_kmeans_clustering = tsUtils.run_time_series_kmeans(zillow_zri_ts, lookup_ix, 9)
zri_kmeans_clustering.head()

0.002 --> 0.002 --> 0.002 --> 0.002 --> 0.002 --> 


Unnamed: 0,State,City,dtw_cluster_prediction
0,AK,Anchor Point,2.0
1,AK,Anchorage,0.0
2,AK,Fairbanks,0.0
3,AK,Homer,0.0
4,AK,Kenai,0.0


# Core Housing Metrics 

In [13]:
# Geo and housing features
housing_features = ['Date', 'RegionName', 'PriceToRentRatio_AllHomes',
                    'ZHVI_AllHomes', 'ZHVIPerSqft_AllHomes', 
                    'ZHVI_BottomTier', 'ZHVI_MiddleTier', 'ZHVI_TopTier']

# all our housing measurement features
housing_values = ['PriceToRentRatio_AllHomes','ZHVI_AllHomes', 
                  'ZHVIPerSqft_AllHomes', 'ZHVI_BottomTier', 'ZHVI_MiddleTier', 'ZHVI_TopTier']

lookup_ix = ['State', 'City']
full_ix = ['State', 'City', 'Date']

city_housing_data = zillowPro.process_city_data(city_raw, housing_features, housing_values, lookup_ix, full_ix)

print('Housing Data Shape:', city_housing_data.shape)
print('Unique State/City Groups:', city_housing_data.groupby(['State', 'City']).ngroups)
city_housing_data.head(23)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


Housing Data Shape: (208176, 9)
Unique State/City Groups: 9771


Unnamed: 0,State,City,Date,AVG_PriceToRentRatio_AllHomes,AVG_ZHVI_AllHomes,AVG_ZHVIPerSqft_AllHomes,AVG_ZHVI_BottomTier,AVG_ZHVI_MiddleTier,AVG_ZHVI_TopTier
0,OR,Rockcreek,1996,16.44,170555.555556,92.666667,138744.444444,170555.555556,228122.222222
1,OR,Rockcreek,1997,16.44,179091.666667,96.75,145366.666667,179091.666667,235116.666667
2,OR,Rockcreek,1998,16.44,186091.666667,100.916667,150791.666667,186091.666667,243700.0
3,OR,Rockcreek,1999,16.44,187900.0,102.166667,153933.333333,187900.0,244291.666667
4,OR,Rockcreek,2000,16.44,188200.0,104.25,159075.0,188200.0,248033.333333
5,OR,Rockcreek,2001,16.44,198316.666667,109.666667,164350.0,198316.666667,261991.666667
6,OR,Rockcreek,2002,16.44,210950.0,114.833333,172875.0,210950.0,278241.666667
7,OR,Rockcreek,2003,16.44,221300.0,120.5,179516.666667,221300.0,293708.333333
8,OR,Rockcreek,2004,16.44,237008.333333,129.583333,192733.333333,237008.333333,314133.333333
9,OR,Rockcreek,2005,16.44,278091.666667,147.833333,219600.0,278091.666667,362808.333333


### Min-Max Scaling

In [14]:
features_to_scale = ['AVG_PriceToRentRatio_AllHomes','AVG_ZHVI_AllHomes', 
                     'AVG_ZHVIPerSqft_AllHomes', 'AVG_ZHVI_BottomTier', 
                     'AVG_ZHVI_MiddleTier', 'AVG_ZHVI_TopTier']

city_housing_data = zillowPro.min_max_scale(city_housing_data, features_to_scale)
city_housing_data.head()

Unnamed: 0,State,City,Date,AVG_PriceToRentRatio_AllHomes,AVG_ZHVI_AllHomes,AVG_ZHVIPerSqft_AllHomes,AVG_ZHVI_BottomTier,AVG_ZHVI_MiddleTier,AVG_ZHVI_TopTier
0,OR,Rockcreek,1996,0.386003,0.023425,0.043268,0.032742,0.023425,0.017271
1,OR,Rockcreek,1997,0.386003,0.024736,0.045572,0.034515,0.024736,0.017878
2,OR,Rockcreek,1998,0.386003,0.025811,0.047924,0.035967,0.025811,0.018623
3,OR,Rockcreek,1999,0.386003,0.026089,0.048629,0.036808,0.026089,0.018674
4,OR,Rockcreek,2000,0.386003,0.026135,0.049805,0.038184,0.026135,0.018998


### Housing Dynamic Time Warping Similarity With Gentrified Cities

In [15]:
city_dtw_housing = gentPro.set_geo_dtw_data(city_housing_data)

In [16]:
# Cities MOST similar to our gentrification control
city_dtw_housing.sort_values(by='dtw_score', ascending=True).reset_index(drop=True).head(21)

Unnamed: 0,State,City,dtw_score,dtw_value,dtw_label
0,GENT_CONTROL,GENT_CONTROL,0.0,0.0,HIGH_GENT
1,MA,Medford,0.088412,0.0,HIGH_GENT
2,MA,Stoneham,0.088817,0.0,HIGH_GENT
3,CA,Oceanside,0.092052,0.0,HIGH_GENT
4,CA,Santee,0.097922,0.0,HIGH_GENT
5,VA,Centreville,0.10039,0.0,HIGH_GENT
6,VA,Lorton,0.102779,0.0,HIGH_GENT
7,CA,El Cajon,0.103188,0.0,HIGH_GENT
8,MA,Woburn,0.104216,0.0,HIGH_GENT
9,CA,Rohnert Park,0.108622,0.0,HIGH_GENT


In [17]:
# Cities LEAST similar to our gentrification control
city_dtw_housing.sort_values(by='dtw_score', ascending=False).reset_index(drop=True).head(21)

Unnamed: 0,State,City,dtw_score,dtw_value,dtw_label
0,CA,Atherton,5.424433,4.0,NO_GENT
1,CA,Los Altos Hills,4.585154,4.0,NO_GENT
2,CA,Hillsborough,4.393282,4.0,NO_GENT
3,CA,Portola Valley,3.905214,4.0,NO_GENT
4,NY,Water Mill,3.758229,4.0,NO_GENT
5,CA,Belvedere,3.557124,4.0,NO_GENT
6,CA,Woodside,3.312129,4.0,NO_GENT
7,CA,Beverly Hills,3.307819,4.0,NO_GENT
8,CA,Hidden Hills,3.251623,4.0,NO_GENT
9,CA,Monte Sereno,3.16606,4.0,NO_GENT


### DTW Time-Series Kmeans Clustering 

In [18]:
zillow_zhvi_ts = pd.read_csv('/Users/briankalinowski/Desktop/CIS600_DataMining/Zillow_Data_Clean/zillow_portal_zhvi.csv')
zillow_zhvi_ts = tsUtils.clean_zillow_ts_data(zillow_zhvi_ts, lookup_ix)
zillow_zhvi_ts.head()

Unnamed: 0,State,City,1996-04,1996-05,1996-06,1996-07,1996-08,1996-09,1996-10,1996-11,...,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04
0,AK,Anchorage,0.053775,0.0542,0.054672,0.055144,0.055569,0.055944,0.056181,0.056372,...,0.04786,0.047604,0.047395,0.047462,0.04795,0.048004,0.047606,0.047491,0.048415,0.049733
1,AK,Fairbanks,0.042466,0.04275,0.043033,0.043176,0.043225,0.043319,0.043274,0.043276,...,0.034535,0.034426,0.034383,0.034324,0.034475,0.034668,0.03466,0.034623,0.034899,0.035352
2,AK,Homer,0.04913,0.04946,0.049838,0.050216,0.050641,0.05111,0.051582,0.052054,...,0.038798,0.038661,0.038666,0.038852,0.039062,0.039043,0.038869,0.038877,0.03942,0.040203
3,AK,Juneau,0.072685,0.07222,0.07166,0.071006,0.070352,0.069742,0.069089,0.068482,...,0.054806,0.054549,0.054676,0.0549,0.055085,0.055155,0.055283,0.055617,0.056268,0.057041
4,AK,Kenai,0.047956,0.048099,0.048196,0.048339,0.048482,0.048763,0.049,0.049284,...,0.031986,0.031939,0.031902,0.031919,0.031942,0.031907,0.031813,0.031818,0.03223,0.032863


In [19]:
# Time-Series-KMeans
zhvi_kmeans_clustering = tsUtils.run_time_series_kmeans(zillow_zhvi_ts, lookup_ix, 23)
zhvi_kmeans_clustering.head()

0.006 --> 0.004 --> 0.004 --> 0.004 --> 0.003 --> 


Unnamed: 0,State,City,dtw_cluster_prediction
0,AK,Anchorage,0.0
1,AK,Fairbanks,0.0
2,AK,Homer,0.0
3,AK,Juneau,0.0
4,AK,Kenai,0.0


### Housing DTW Gentrificaion Control 

In [20]:
city_housing_data.loc[city_housing_data['State'] == 'GENT_CONTROL'].loc[city_housing_data['City'] == 'GENT_CONTROL']

Unnamed: 0,State,City,Date,AVG_PriceToRentRatio_AllHomes,AVG_ZHVI_AllHomes,AVG_ZHVIPerSqft_AllHomes,AVG_ZHVI_BottomTier,AVG_ZHVI_MiddleTier,AVG_ZHVI_TopTier
208154,GENT_CONTROL,GENT_CONTROL,1996,0.324749,0.018345,0.045109,0.023311,0.018345,0.015757
208155,GENT_CONTROL,GENT_CONTROL,1997,0.323858,0.019248,0.047561,0.024302,0.019248,0.016845
208156,GENT_CONTROL,GENT_CONTROL,1998,0.323245,0.021166,0.052545,0.026253,0.021166,0.018897
208157,GENT_CONTROL,GENT_CONTROL,1999,0.322978,0.023425,0.058632,0.028792,0.023425,0.021025
208158,GENT_CONTROL,GENT_CONTROL,2000,0.322794,0.027139,0.068768,0.033169,0.027139,0.024412
208159,GENT_CONTROL,GENT_CONTROL,2001,0.322794,0.030807,0.078889,0.037782,0.030807,0.027582
208160,GENT_CONTROL,GENT_CONTROL,2002,0.322794,0.034491,0.088502,0.042628,0.034491,0.030199
208161,GENT_CONTROL,GENT_CONTROL,2003,0.322794,0.039324,0.100754,0.048708,0.039324,0.033571
208162,GENT_CONTROL,GENT_CONTROL,2004,0.324682,0.046726,0.119862,0.057994,0.046726,0.039495
208163,GENT_CONTROL,GENT_CONTROL,2005,0.327231,0.053868,0.138456,0.066889,0.053868,0.045065


# Sales Housing Metrics 

Results for this section were inconcusive do to the nature of NaN values in the raw data

In [33]:
# Geo and housing sales features
housing_sales_features = ['Date', 'RegionName', 
                          'Sale_Counts_Seas_Adj',
                          'InventorySeasonallyAdjusted_AllHomes',
                          'PctOfHomesDecreasingInValues_AllHomes',
                          'PctOfHomesIncreasingInValues_AllHomes']

# all our housing sales measurement features
housing_sales_values = ['Sale_Counts_Seas_Adj',
                        'InventorySeasonallyAdjusted_AllHomes',
                        'PctOfHomesDecreasingInValues_AllHomes',
                        'PctOfHomesIncreasingInValues_AllHomes']

lookup_ix = ['State', 'City']
full_ix = ['State', 'City', 'Date']

city_housing_sales_data = zillowPro.process_city_data(city_raw, housing_sales_features, housing_sales_values, 
                                                      lookup_ix, full_ix)

print('Housing Data Shape:', city_housing_sales_data.shape)
print('Unique State/City Groups:', city_housing_sales_data.groupby(['State', 'City']).ngroups)
city_housing_data.head(21)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


Housing Data Shape: (27485, 7)
Unique State/City Groups: 1344


Unnamed: 0,State,City,Date,AVG_PriceToRentRatio_AllHomes,AVG_ZHVI_AllHomes,AVG_ZHVIPerSqft_AllHomes,AVG_ZHVI_BottomTier,AVG_ZHVI_MiddleTier,AVG_ZHVI_TopTier
0,OR,Rockcreek,1996,0.386003,0.023425,0.043268,0.032742,0.023425,0.017271
1,OR,Rockcreek,1997,0.386003,0.024736,0.045572,0.034515,0.024736,0.017878
2,OR,Rockcreek,1998,0.386003,0.025811,0.047924,0.035967,0.025811,0.018623
3,OR,Rockcreek,1999,0.386003,0.026089,0.048629,0.036808,0.026089,0.018674
4,OR,Rockcreek,2000,0.386003,0.026135,0.049805,0.038184,0.026135,0.018998
5,OR,Rockcreek,2001,0.386003,0.027688,0.052862,0.039596,0.027688,0.020209
6,OR,Rockcreek,2002,0.386003,0.029628,0.055778,0.041877,0.029628,0.021619
7,OR,Rockcreek,2003,0.386003,0.031218,0.058976,0.043655,0.031218,0.02296
8,OR,Rockcreek,2004,0.386003,0.03363,0.064102,0.047192,0.03363,0.024732
9,OR,Rockcreek,2005,0.386003,0.039939,0.074402,0.054383,0.039939,0.028954


### Issues with the Housing Sales Metrics

Unlike our Rental and Core Housing metrics these sales metrics appear to have pockets of NAN values, These need to be dropped to maintain the overall trends of the time-serires data. Unfortually our DTW clustering method will not work with different sized data per city, so we will just backfill for now

In [34]:
len(city_housing_sales_data[city_housing_sales_data.isna().any(axis=1)])

29

In [36]:
city_housing_sales_data.fillna(method='bfill', inplace=True)

len(city_housing_sales_data[city_housing_sales_data.isna().any(axis=1)])

0

In [37]:
features_to_scale = ['AVG_Sale_Counts_Seas_Adj',
                    'AVG_InventorySeasonallyAdjusted_AllHomes',
                    'AVG_PctOfHomesDecreasingInValues_AllHomes',
                    'AVG_PctOfHomesIncreasingInValues_AllHomes']

city_housing_sales_data = zillowPro.min_max_scale(city_housing_sales_data, features_to_scale)
city_housing_sales_data.head()

Unnamed: 0,State,City,Date,AVG_Sale_Counts_Seas_Adj,AVG_InventorySeasonallyAdjusted_AllHomes,AVG_PctOfHomesDecreasingInValues_AllHomes,AVG_PctOfHomesIncreasingInValues_AllHomes
0,IL,West Dundee,1998,0.001263,0.002241,0.256931,0.629739
1,IL,West Dundee,1999,0.001263,0.002241,0.206547,0.679037
2,IL,West Dundee,2000,0.001263,0.002241,0.415411,0.409348
3,IL,West Dundee,2001,0.001263,0.002241,0.095015,0.839402
4,IL,West Dundee,2002,0.001263,0.002241,0.077865,0.862817


In [38]:
city_dtw_housing_sales = gentPro.set_geo_dtw_data(city_housing_sales_data)

In [39]:
city_dtw_housing_sales.head(20)

Unnamed: 0,State,City,dtw_score,dtw_value,dtw_label
0,AL,Chelsea,1.54078,2.0,NORMAL_GENT
1,AL,Meadowbrook,1.451029,2.0,NORMAL_GENT
2,AL,Wilsonville,1.734288,3.0,LOW_GENT
3,AR,Austin,1.883936,3.0,LOW_GENT
4,AR,Bryant,1.513555,2.0,NORMAL_GENT
5,AR,Little Rock,1.348355,1.0,MID_GENT
6,AR,Lonoke,1.775468,3.0,LOW_GENT
7,AR,Maumelle,1.77208,3.0,LOW_GENT
8,AR,North Little Rock,1.697641,3.0,LOW_GENT
9,AR,Sherwood,1.84084,3.0,LOW_GENT
