In [2]:
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [26]:
csvpath = Path("../Resources/Jack/Texas_Pricing_Data.csv")

In [27]:
pricing_data_df = pd.read_csv(csvpath)

In [28]:
pricing_data_df.set_index(pd.to_datetime(pricing_data_df['month_date_yyyymm'], format='%Y%m', infer_datetime_format=True), inplace=True)

In [29]:
pricing_data_df.index.rename('date', inplace=True)

In [30]:
pricing_data_df.sort_values(by=['postal_code','date'], inplace=True)

In [31]:
pricing_data_df['pct_price_change'] = 0

In [32]:
pricing_data_df['pct_price_sqft_change'] = 0

In [33]:
pricing_data_df['pct_price_sqft_change'] = pricing_data_df['median_listing_price_per_square_feet'].pct_change()

In [34]:
pricing_data_df.index.rename('date', inplace=True)

In [35]:
median_list_sqft_df = pd.pivot_table(pricing_data_df, index='date', columns = 'postal_code', values = 'median_listing_price_per_square_feet')

In [36]:
median_list_sqft_df_pct_change = median_list_sqft_df.pct_change()

In [37]:
median_list_sqft_df_pct_change.head()

postal_code,77002,77003,77004,77005,77006,77007,77008,77009,77011,77012,...,77962,77963,77964,77968,77975,77979,77982,77983,77984,77995
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-07-01,,,,,,,,,,,...,,,,,,,,,,
2016-08-01,-0.010487,0.000632,-0.020456,-0.024619,0.020962,-0.000356,0.005425,0.020145,0.038462,,...,,-0.237935,-0.198275,,,0.15063,0.062014,0.0,,-0.076036
2016-09-01,-0.009577,-0.008834,-0.016567,0.019856,-0.073172,-0.000154,-0.005395,0.002526,0.02552,,...,,-0.167035,-0.01162,,,0.033614,-0.043138,0.0,,0.082293
2016-10-01,-0.025969,-0.026389,-0.001245,0.008626,0.007592,0.00634,-0.006488,-0.01517,-0.071668,,...,,-0.047624,0.021581,,,0.013753,-0.027548,0.010161,,-0.062003
2016-11-01,0.013614,0.051141,0.009703,-0.007625,0.013765,0.004716,0.013427,-0.023271,-0.048484,,...,,0.069498,0.096479,-0.043229,,-0.011684,0.0,0.0,,-0.007656


In [15]:
active_listings_df = pd.pivot_table(pricing_data_df, index='date', columns = 'postal_code', values = 'active_listing_count')
active_listings_df.head()

postal_code,77002,77003,77004,77005,77006,77007,77008,77009,77011,77012,...,77962,77963,77964,77968,77975,77979,77982,77983,77984,77995
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-07-01,52.0,98.0,264.0,162.0,184.0,573.0,500.0,266.0,17.0,,...,,13.0,27.0,,,97.0,67.0,32.0,,28.0
2016-08-01,54.0,99.0,255.0,155.0,190.0,571.0,476.0,257.0,13.0,,...,,16.0,17.0,,,90.0,63.0,33.0,,30.0
2016-09-01,57.0,98.0,253.0,148.0,193.0,544.0,450.0,253.0,17.0,,...,,15.0,20.0,,,91.0,65.0,37.0,,30.0
2016-10-01,55.0,98.0,259.0,139.0,199.0,544.0,450.0,259.0,20.0,,...,,17.0,24.0,13.0,,90.0,80.0,39.0,,32.0
2016-11-01,53.0,116.0,265.0,134.0,201.0,532.0,419.0,244.0,21.0,,...,,16.0,20.0,16.0,,98.0,80.0,40.0,,34.0


In [16]:
new_listing_df = pd.pivot_table(pricing_data_df, index='date', columns = 'postal_code', values = 'new_listing_count')
new_listing_df.head()

postal_code,77002,77003,77004,77005,77006,77007,77008,77009,77011,77012,...,77962,77963,77964,77968,77975,77979,77982,77983,77984,77995
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-07-01,12.0,40.0,68.0,56.0,60.0,200.0,128.0,80.0,8.0,,...,,0.0,4.0,,,12.0,4.0,8.0,,4.0
2016-08-01,12.0,40.0,72.0,40.0,48.0,156.0,128.0,68.0,0.0,,...,,4.0,0.0,,,20.0,12.0,4.0,,4.0
2016-09-01,12.0,40.0,64.0,52.0,68.0,144.0,120.0,72.0,4.0,,...,,0.0,8.0,,,24.0,4.0,4.0,,4.0
2016-10-01,8.0,32.0,56.0,48.0,60.0,156.0,156.0,80.0,4.0,,...,,0.0,8.0,8.0,,12.0,4.0,0.0,,4.0
2016-11-01,16.0,48.0,64.0,24.0,68.0,156.0,96.0,68.0,4.0,,...,,0.0,0.0,0.0,,16.0,8.0,0.0,,8.0


In [17]:
average_listing_price_df = pd.pivot_table(pricing_data_df, index='date', columns = 'postal_code', values = 'average_listing_price')
average_listing_price_df.head()

postal_code,77002,77003,77004,77005,77006,77007,77008,77009,77011,77012,...,77962,77963,77964,77968,77975,77979,77982,77983,77984,77995
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-07-01,499723.0,341451.0,441233.0,1510365.0,774774.0,580182.0,575998.0,472284.0,185419.0,,...,,393036.0,394186.0,,,353403.0,381037.0,134068.0,,241024.0
2016-08-01,505122.0,334126.0,431404.0,1503900.0,749081.0,572637.0,572436.0,478573.0,172442.0,,...,,309691.0,188174.0,,,320148.0,406126.0,136147.0,,217864.0
2016-09-01,476139.0,340273.0,425008.0,1472984.0,767804.0,573682.0,563035.0,475897.0,179725.0,,...,,284766.0,174453.0,,,309580.0,379249.0,140606.0,,232794.0
2016-10-01,439559.0,342834.0,417510.0,1481270.0,781719.0,568499.0,550352.0,464904.0,177378.0,,...,,253020.0,261211.0,337062.0,,310148.0,353490.0,145223.0,,231415.0
2016-11-01,425851.0,359987.0,425358.0,1504154.0,776021.0,571030.0,553777.0,460867.0,209495.0,,...,,240307.0,283003.0,362755.0,,309276.0,352519.0,145269.0,,301838.0


In [18]:
pending_ratio_df = pd.pivot_table(pricing_data_df, index='date', columns = 'postal_code', values = 'pending_ratio')
pending_ratio_df.head()

postal_code,77002,77003,77004,77005,77006,77007,77008,77009,77011,77012,...,77962,77963,77964,77968,77975,77979,77982,77983,77984,77995
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-07-01,0.2885,0.5102,0.3712,0.3827,0.3478,0.3176,0.346,0.3308,0.2941,,...,,0.4615,0.1111,,,0.0825,0.0,0.0,,0.1429
2016-08-01,0.3704,0.4646,0.4118,0.3419,0.3368,0.324,0.3718,0.3191,0.7692,,...,,0.125,0.0588,,,0.1111,0.0159,0.0303,,0.1
2016-09-01,0.2105,0.449,0.3755,0.3514,0.2953,0.318,0.36,0.3083,0.2353,,...,,0.2,0.05,,,0.0989,0.0462,0.027,,0.0667
2016-10-01,0.1455,0.4082,0.332,0.3885,0.2663,0.2776,0.3422,0.3282,0.15,,...,,0.1176,0.125,0.0,,0.1,0.0125,0.0256,,0.0625
2016-11-01,0.1509,0.431,0.3509,0.3731,0.2289,0.2669,0.4248,0.3402,0.1429,,...,,0.0625,0.25,0.0,,0.0612,0.0125,0.025,,0.0882


In [121]:
median_list_sqft_df_pct_change.columns

Int64Index([77002, 77003, 77004, 77005, 77006, 77007, 77008, 77009, 77011,
            77012,
            ...
            77962, 77963, 77964, 77968, 77975, 77979, 77982, 77983, 77984,
            77995],
           dtype='int64', name='postal_code', length=292)

In [122]:
dict_of_frames = {}
for i in median_list_sqft_df_pct_change.columns:    
    dict_of_frames[i] = pd.DataFrame({
    'PCT Change per SQFT' : median_list_sqft_df_pct_change[i],
    'Active Listings' : active_listings_df[i], 
    'New Listings' : new_listing_df[i], 
    'Average Listing Price' : average_listing_price_df[i], 
    'Pending Ratio' : pending_ratio_df[i],
    }).corr()
    np.fill_diagonal(dict_of_frames[i].values, -2)

In [123]:
dict_of_frames[77006]

Unnamed: 0,PCT Change per SQFT,Active Listings,New Listings,Average Listing Price,Pending Ratio
PCT Change per SQFT,-2.0,-0.050696,-0.121113,0.054265,0.039511
Active Listings,-0.050696,-2.0,0.278191,-0.384,-0.420117
New Listings,-0.121113,0.278191,-2.0,-0.126055,0.159069
Average Listing Price,0.054265,-0.384,-0.126055,-2.0,0.115903
Pending Ratio,0.039511,-0.420117,0.159069,0.115903,-2.0


In [120]:
dict_of_frames[77995].agg({'PCT Change per SQFT' : ['max']})

Unnamed: 0,PCT Change per SQFT
max,0.216718


In [114]:
dict_of_frames[77002].index[dict_of_frames[77002]['PCT Change per SQFT'] == dict_of_frames[77002].agg({'PCT Change per SQFT' : ['max']}).index[0]].tolist()

  return op(a, b)


[]

In [57]:
pd.DataFrame({
    'PCT Change per SQFT' : median_list_sqft_df_pct_change[77002],
    'Active Listings' : active_listings_df[77002], 
    'New Listings' : new_listing_df[77002], 
    'Average Listing Price' : average_listing_price_df[77002], 
    'Pending Ratio' : pending_ratio_df[77002],
}).corr()

Unnamed: 0,PCT Change per SQFT,Active Listings,New Listings,Average Listing Price,Pending Ratio
PCT Change per SQFT,1.0,0.046096,0.034071,-0.019587,-0.04229
Active Listings,0.046096,1.0,0.136614,-0.364347,0.259542
New Listings,0.034071,0.136614,1.0,-0.291538,0.146742
Average Listing Price,-0.019587,-0.364347,-0.291538,1.0,0.137889
Pending Ratio,-0.04229,0.259542,0.146742,0.137889,1.0


In [74]:
pd.DataFrame({
    'x' : median_list_sqft_df_pct_change['median_listing_price'], 'y' : median_list_sqft_df_pct_change['active_listing_count'], 
}).corr()

KeyError: 'median_listing_price'

In [12]:
mlr_zip = LinearRegression()

In [13]:
mlr_zip.fit(df_zip[['median_listing_price','new_listing_count']], df_zip['pct_price_change'])

NameError: name 'df_zip' is not defined

In [14]:
print(mlr_zip.intercept_)

AttributeError: 'LinearRegression' object has no attribute 'intercept_'

In [15]:
print(mlr_zip.coef_)

AttributeError: 'LinearRegression' object has no attribute 'coef_'