In [None]:
# Import GraphLab

In [None]:
import graphlab

# Load some houses prices

In [None]:
sales = graphlab.SFrame('home_data.gl/')

In [None]:
sales

# Exploring the data for housing sales

In [None]:
graphlab.canvas.set_target('ipynb')
sales.show(view="Scatter Plot", x="sqft_living", y="price")

# Create a simple regression model of sqft_living to price

In [None]:
train_data, test_data = sales.random_split(.8, seed=0)

# Build the regression model

In [None]:
sqft_model = graphlab.linear_regression.create(train_data, target='price', features=['sqft_living'])

# Evaluate the simple model

In [None]:
print test_data['price'].mean()

In [None]:
sqft_model.evaluate(test_data)

# Let's show what predictions look like

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.plot(test_data["sqft_living"], test_data["price"], '.', 
         test_data['sqft_living'], sqft_model.predict(test_data), '-')

In [None]:
sqft_model.get('coefficients')

# Exploring other features of the data

In [None]:
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']

In [None]:
sales[my_features].show()

In [None]:
sales.show(view="BoxWhisker Plot", x='zipcode', y='price')

# Build a regression model with more features

In [None]:
my_features_model = graphlab.linear_regression.create(train_data, target='price', features=my_features)

In [None]:
print my_features

In [None]:
print sqft_model.evaluate(test_data)
print my_features_model.evaluate(test_data)

# Apply learned model to predict prices of 3 houses

In [None]:
house1 = sales[sales['id'] =="5309101200"]

In [None]:
house1

In [None]:
print house1['price']

In [None]:
print sqft_model.predict(house1)

In [None]:
print my_features_model.predict(house1)

In [None]:
house2 = sales[sales['id']=='1925069082']

In [None]:
house2

In [None]:
print sqft_model.predict(house2)

In [None]:
print my_features_model.predict(house2)

# Last house, super fancy

In [None]:
bill_gates = {'bedrooms':[8], 
              'bathrooms':[25], 
              'sqft_living':[50000], 
              'sqft_lot':[225000],
              'floors':[4], 
              'zipcode':['98039'], 
              'condition':[10], 
              'grade':[10],
              'waterfront':[1],
              'view':[4],
              'sqft_above':[37500],
              'sqft_basement':[12500],
              'yr_built':[1994],
              'yr_renovated':[2010],
              'lat':[47.627606],
              'long':[-122.242054],
              'sqft_living15':[5000],
              'sqft_lot15':[40000]}

In [None]:
print my_features_model.predict(graphlab.SFrame(bill_gates))

# Select Zipcode with higher price

In [None]:
house2 = sales[sales['zipcode'] == "98039"]

In [None]:
house2
house2['price'].mean()

In [None]:
print len(house2)
house3 = sales[(sales['zipcode'] == "98039") & 
               (sales['sqft_living'].apply(lambda x: x > 2000)) & 
               (sales['sqft_living'].apply(lambda x: x < 4000))]
house3
house3['price'].mean()

In [None]:
print len(house3)

In [None]:
advanced_features = [
'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house				
'grade', # measure of quality of construction				
'waterfront', # waterfront property				
'view', # type of view				
'sqft_above', # square feet above ground				
'sqft_basement', # square feet in basement				
'yr_built', # the year built				
'yr_renovated', # the year renovated				
'lat', 'long', # the lat-long of the parcel				
'sqft_living15', # average sq.ft. of 15 nearest neighbors 				
'sqft_lot15', # average lot size of 15 nearest neighbors 
]

In [None]:
sales[advanced_features].show()

In [None]:
my_advanced_features_model = graphlab.linear_regression.create(train_data, target='price', features=advanced_features, validation_set=None)

In [None]:
rmse_my_features_model = my_features_model.evaluate(test_data)
rmse_my_advanced_features_model = my_advanced_features_model.evaluate(test_data)

print my_features_model.evaluate(test_data)
print my_advanced_features_model.evaluate(test_data)