In [1]:
import pandas as pd
import numpy as np
import json
import gmaps
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import requests
from census import Census
from config import gkey
from IPython.display import display
import ipywidgets as widgets
import gmaps.geojson_geometries

gmaps.configure(api_key=gkey)


In [2]:
file = "../Resources/Austin Data Set CSV.csv"
base_df = pd.read_csv(file)

In [3]:
mask = base_df.lotSizeSqFt != 1508482800.0
new = base_df[mask]
new.numOfBathrooms

0        3.0
1        2.0
2        2.0
3        2.0
4        3.0
        ... 
15166    2.0
15167    4.0
15168    3.0
15169    2.0
15170    1.0
Name: numOfBathrooms, Length: 15170, dtype: float64

In [4]:
mask = new.numOfBathrooms != 27.0
new1 = new[mask]
new1.numOfBathrooms

0        3.0
1        2.0
2        2.0
3        2.0
4        3.0
        ... 
15166    2.0
15167    4.0
15168    3.0
15169    2.0
15170    1.0
Name: numOfBathrooms, Length: 15169, dtype: float64

In [5]:
mask = new1.homeType == "Single Family"
cleaned_data = new1[mask]
cleaned_data.homeType.value_counts()

Single Family    14239
Name: homeType, dtype: int64

In [6]:
cleaned_data.columns

Index(['zpid', 'city', 'streetAddress', 'zipcode', 'description', 'latitude',
       'longitude', 'propertyTaxRate', 'garageSpaces', 'hasAssociation',
       'hasCooling', 'hasGarage', 'hasHeating', 'hasSpa', 'hasView',
       'homeType', 'parkingSpaces', 'yearBuilt', 'latestPrice',
       'numPriceChanges', 'latest_saledate', 'latest_salemonth',
       'latest_saleyear', 'latestPriceSource', 'numOfPhotos',
       'numOfAccessibilityFeatures', 'numOfAppliances', 'numOfParkingFeatures',
       'numOfPatioAndPorchFeatures', 'numOfSecurityFeatures',
       'numOfWaterfrontFeatures', 'numOfWindowFeatures',
       'numOfCommunityFeatures', 'lotSizeSqFt', 'livingAreaSqFt',
       'numOfPrimarySchools', 'numOfElementarySchools', 'numOfMiddleSchools',
       'numOfHighSchools', 'avgSchoolDistance', 'avgSchoolRating',
       'avgSchoolSize', 'MedianStudentsPerTeacher', 'numOfBathrooms',
       'numOfBedrooms', 'numOfStories', 'homeImage'],
      dtype='object')

In [7]:
city_latslong = cleaned_data[["latitude", "longitude", "latestPrice"]]
city_latslong

Unnamed: 0,latitude,longitude,latestPrice
0,30.430632,-97.663078,305000
1,30.432672,-97.661697,295000
2,30.409748,-97.639771,256125
3,30.432112,-97.661659,240000
4,30.437368,-97.656860,239900
...,...,...,...
15165,30.185915,-97.828415,280000
15166,30.161997,-97.816345,330000
15167,30.240223,-97.772835,550000
15168,30.244835,-97.761124,875000


In [8]:
price = cleaned_data['latestPrice']
price

0        305000
1        295000
2        256125
3        240000
4        239900
          ...  
15165    280000
15166    330000
15167    550000
15168    875000
15170    374900
Name: latestPrice, Length: 14239, dtype: int64

In [9]:
sale_year = cleaned_data[["latest_saleyear", "latitude", 'longitude', "latestPrice"]]
sale_year

Unnamed: 0,latest_saleyear,latitude,longitude,latestPrice
0,2019,30.430632,-97.663078,305000
1,2020,30.432672,-97.661697,295000
2,2019,30.409748,-97.639771,256125
3,2018,30.432112,-97.661659,240000
4,2018,30.437368,-97.656860,239900
...,...,...,...,...
15165,2020,30.185915,-97.828415,280000
15166,2020,30.161997,-97.816345,330000
15167,2020,30.240223,-97.772835,550000
15168,2021,30.244835,-97.761124,875000


In [10]:
sale_year.rename(columns= {'latest_saleyear': 'year'}, inplace=True)
sale_year

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,year,latitude,longitude,latestPrice
0,2019,30.430632,-97.663078,305000
1,2020,30.432672,-97.661697,295000
2,2019,30.409748,-97.639771,256125
3,2018,30.432112,-97.661659,240000
4,2018,30.437368,-97.656860,239900
...,...,...,...,...
15165,2020,30.185915,-97.828415,280000
15166,2020,30.161997,-97.816345,330000
15167,2020,30.240223,-97.772835,550000
15168,2021,30.244835,-97.761124,875000


In [49]:
base_df.lotSizeSqFt.value_counts()
# swap out sqft for something more manageable.  like year, house type, etc.  

11325.6     300
12196.8     285
11761.2     281
6969.0      258
7840.0      233
           ... 
10004.0       1
138085.2      1
7474.0        1
9722.0        1
5867.0        1
Name: lotSizeSqFt, Length: 1426, dtype: int64

In [12]:
locations = city_latslong[["latitude", "longitude"]]
fig = gmaps.figure(map_type='TERRAIN')
heatmap_layer = gmaps.heatmap_layer(locations, weights=cleaned_data['latestPrice']) 

fig.add_layer(heatmap_layer)
# fig.add_layer(gmaps.traffic_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [13]:
# class Interactive(object):
  

#     def __init__(self, count_of_schools):
#         self._df = count_of_schools
#         self._heatmap = None
#         self._slider = None
#         min_count = min(self._df['numOfPrimarySchools'])

#         map_figure = self._render_map(min_count)
#         controls = self._render_controls(min_count)
#         self._container = widgets.VBox([title_widget, controls, map_figure])
        
#         title_widget = widgets.HTML('<h3>Home Price by Square Footage</h3>''<h4>Data from The Austin Housing Project</a></h4>')

#     def render(self):
#         display(self._container)

#     def _on_year_change(self, change):
#         numOfPrimarySchools = self._slider.value
#         self._heatmap.locations = self._locations_for_year(numOfPrimarySchools)
#         self._total_box.value = self._total_count_of_schools_text(numOfPrimarySchools)
#         return self._container

#     def _render_map(self, min_count):
#         fig = gmaps.figure(map_type='TERRAIN')
#         self._heatmap = gmaps.heatmap_layer(self._locations_for_year(min_count),
#             max_intensity=100,
#             point_radius=8
#         )
        
#         fig.add_layer(self._heatmap)
#         return fig

#     def _render_controls(self, min_sqft):
#         self._slider = widgets.IntSlider(
#             value=min_count,
#             min=min(self._df['numOfPrimarySchools']),
#             max=max(self._df['numOfPrimarySchools']),
#             description='Count of schools',
#             continuous_update=False
#         )
#         self._total_box = widgets.Label(
#             value=self._total_count_of_schools_text(min_count)
#         )
#         self._slider.observe(self._on_year_change, names='value')
#         controls = widgets.HBox(
#             [self._slider, self._total_box],
#             layout={'justify_content': 'space-between'}
#         )
#         return controls

#     def _locations_for_year(self, numOfPrimarySchools):
#         return self._df[self._df['numOfPrimarySchools'] == numOfPrimarySchools][['latitude', 'longitude']]

#     def _total_count_of_schools(self, numOfPrimarySchools):
#         return int(self._df[self._df['numOfPrimarySchools'] == numOfPrimarySchools]['numOfPrimarySchools'].count())

#     def _total_count_of_schools_text(self, numOfPrimarySchools):
#         return '{} Count of schools'.format(self._total_count_of_schools(numOfPrimarySchools))
    


# Interactive(sqft_price).render()

In [27]:
class Interactive(object): 
    
    def __init__(self, sale_year):
        self._df = sale_year
        self._heatmap = None
        self._slider = None
        initial_year = min(self._df['year'])

        title_widget = widgets.HTML(
            '<h3>Home sales by year</h3>''<h4>Data from The Austin Housing Project</a></h4>'
            )


        map_figure = self._render_map(initial_year)
        controls = self._render_controls(initial_year)
        self._container = widgets.VBox([title_widget, controls, map_figure])

    def render(self):
        display(self._container)

    def _on_year_change(self, change):
        year = self._slider.value
        self._heatmap.locations = self._locations_for_year(year)
        self._total_box.value = self._total_sales_text_for_year(year)
        return self._container

    def _render_map(self, initial_year):
        fig = gmaps.figure(map_type='HYBRID')
        self._heatmap = gmaps.heatmap_layer(
            self._locations_for_year(initial_year),
            max_intensity=25,
            point_radius=20
        )
        fig.add_layer(self._heatmap)
        return fig

    def _render_controls(self, initial_year):
        self._slider = widgets.IntSlider(
            value=initial_year,
            min=min(self._df['year']),
            max=max(self._df['year']),
            description='Year',
            continuous_update=False
        )
        self._total_box = widgets.Label(
            value=self._total_sales_text_for_year(initial_year)
        )
        self._slider.observe(self._on_year_change, names='value')
        controls = widgets.HBox(
            [self._slider, self._total_box],
            layout={'justify_content': 'space-between'}
        )
        return controls

    def _locations_for_year(self, year):
        return self._df[self._df['year'] == year][['latitude', 'longitude']]

    def _total_sales_for_year(self, year):
        return int(self._df[self._df['year'] == year]['year'].count())

    def _total_sales_text_for_year(self, year):
        return '{} Total Home Sales'.format(self._total_sales_for_year(year))


Interactive(sale_year).render()

VBox(children=(HTML(value='<h3>Home sales by year</h3><h4>Data from The Austin Housing Project</a></h4>'), HBo…

In [18]:
heatmap_layer.max_intensity = 10000
heatmap_layer.point_radius = 5

In [28]:
sale_month = cleaned_data[["latest_salemonth", "latitude", 'longitude', "latestPrice"]]
sale_month

Unnamed: 0,latest_salemonth,latitude,longitude,latestPrice
0,9,30.430632,-97.663078,305000
1,10,30.432672,-97.661697,295000
2,7,30.409748,-97.639771,256125
3,8,30.432112,-97.661659,240000
4,10,30.437368,-97.656860,239900
...,...,...,...,...
15165,11,30.185915,-97.828415,280000
15166,12,30.161997,-97.816345,330000
15167,12,30.240223,-97.772835,550000
15168,1,30.244835,-97.761124,875000


In [None]:
# sale_month = cleaned_data[["latest_salemonth", "latitude", 'longitude', "latestPrice"]]
# sale_month.rename(columns= {'latest_salemonth': 'Month'}, inplace=True)
# sale_month

In [29]:
sale_month.rename(columns= {'latest_salemonth': 'Month'}, inplace=True)
sale_month

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,Month,latitude,longitude,latestPrice
0,9,30.430632,-97.663078,305000
1,10,30.432672,-97.661697,295000
2,7,30.409748,-97.639771,256125
3,8,30.432112,-97.661659,240000
4,10,30.437368,-97.656860,239900
...,...,...,...,...
15165,11,30.185915,-97.828415,280000
15166,12,30.161997,-97.816345,330000
15167,12,30.240223,-97.772835,550000
15168,1,30.244835,-97.761124,875000


In [33]:
class Interactive(object): 
    
    def __init__(self, sale_month):
        self._df = sale_month
        self._heatmap = None
        self._slider = None
        initial_month = min(self._df['Month'])

        title_widget = widgets.HTML(
            '<h3>Home sales by Month</h3>''<h4>Data from The Austin Housing Project</a></h4>'
            )


        map_figure = self._render_map(initial_month)
        controls = self._render_controls(initial_month)
        self._container = widgets.VBox([title_widget, controls, map_figure])

    def render(self):
        display(self._container)

    def _on_month_change(self, change):
        Month = self._slider.value
        self._heatmap.locations = self._locations_for_month(Month)
        self._total_box.value = self._total_sales_text_for_month(Month)
        return self._container

    def _render_map(self, initial_month):
        fig = gmaps.figure(map_type='HYBRID')
        self._heatmap = gmaps.heatmap_layer(
            self._locations_for_month(initial_month),
            max_intensity=25,
            point_radius=25
        )
        fig.add_layer(self._heatmap)
        return fig

    def _render_controls(self, initial_month):
        self._slider = widgets.IntSlider(
            value=initial_month,
            min=min(self._df['Month']),
            max=max(self._df['Month']),
            description='Month',
            continuous_update=False
        )
        self._total_box = widgets.Label(
            value=self._total_sales_text_for_month(initial_month)
        )
        self._slider.observe(self._on_month_change, names='value')
        controls = widgets.HBox(
            [self._slider, self._total_box],
            layout={'justify_content': 'space-between'}
        )
        return controls

    def _locations_for_month(self, Month):
        return self._df[self._df['Month'] == Month][['latitude', 'longitude']]

    def _total_sales_for_month(self, Month):
        return int(self._df[self._df['Month'] == Month]['Month'].count())

    def _total_sales_text_for_month(self, Month):
        return '{} historical Home Sales by Month'.format(self._total_sales_for_month(Month))


Interactive(sale_month).render()

VBox(children=(HTML(value='<h3>Home sales by Month</h3><h4>Data from The Austin Housing Project</a></h4>'), HB…

In [None]:
# sqft = cleaned_data[["latest_salemonth", "latitude", 'longitude', "livingAreaSqFt"]]
# sale_month.rename(columns= {'latest_salemonth': 'Month'}, inplace=True)
# sale_month

In [34]:
locations = city_latslong[["latitude", "longitude"]]
fig = gmaps.figure(map_type='TERRAIN')
heatmap_layer = gmaps.heatmap_layer(locations, weights=cleaned_data['livingAreaSqFt']) 

fig.add_layer(heatmap_layer)
# fig.add_layer(gmaps.traffic_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [35]:
cleaned_data.columns

Index(['zpid', 'city', 'streetAddress', 'zipcode', 'description', 'latitude',
       'longitude', 'propertyTaxRate', 'garageSpaces', 'hasAssociation',
       'hasCooling', 'hasGarage', 'hasHeating', 'hasSpa', 'hasView',
       'homeType', 'parkingSpaces', 'yearBuilt', 'latestPrice',
       'numPriceChanges', 'latest_saledate', 'latest_salemonth',
       'latest_saleyear', 'latestPriceSource', 'numOfPhotos',
       'numOfAccessibilityFeatures', 'numOfAppliances', 'numOfParkingFeatures',
       'numOfPatioAndPorchFeatures', 'numOfSecurityFeatures',
       'numOfWaterfrontFeatures', 'numOfWindowFeatures',
       'numOfCommunityFeatures', 'lotSizeSqFt', 'livingAreaSqFt',
       'numOfPrimarySchools', 'numOfElementarySchools', 'numOfMiddleSchools',
       'numOfHighSchools', 'avgSchoolDistance', 'avgSchoolRating',
       'avgSchoolSize', 'MedianStudentsPerTeacher', 'numOfBathrooms',
       'numOfBedrooms', 'numOfStories', 'homeImage'],
      dtype='object')

In [53]:
# locations = city_latslong[["latitude", "longitude"]]
# fig = gmaps.figure(map_type='TERRAIN')
# heatmap_layer = gmaps.heatmap_layer(locations, weights=cleaned_data['lotSizeSqFt']) 

# fig.add_layer(heatmap_layer)
# # fig.add_layer(gmaps.traffic_layer)
# fig

Figure(layout=FigureLayout(height='420px'))

In [None]:
cleaned_data

In [58]:
total_sales = cleaned_data.latestPrice.sum()
total_sales

7351484865

In [59]:
count_ofhomes = len(cleaned_data)
count_ofhomes

14239

In [62]:
average_price = total_sales/count_ofhomes
average_price

516292.216096636