# Launch Turi Create

In [36]:
import turicreate

# Load house sales data

In [37]:
sales = turicreate.SFrame('~/venv/Apple ML foundations/home_data.sframe/')

In [38]:
sales

id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront
7129300520,2014-10-13 00:00:00+00:00,221900,3,1.0,1180,5650,1,0
6414100192,2014-12-09 00:00:00+00:00,538000,3,2.25,2570,7242,2,0
5631500400,2015-02-25 00:00:00+00:00,180000,2,1.0,770,10000,1,0
2487200875,2014-12-09 00:00:00+00:00,604000,4,3.0,1960,5000,1,0
1954400510,2015-02-18 00:00:00+00:00,510000,3,2.0,1680,8080,1,0
7237550310,2014-05-12 00:00:00+00:00,1225000,4,4.5,5420,101930,1,0
1321400060,2014-06-27 00:00:00+00:00,257500,3,2.25,1715,6819,2,0
2008000270,2015-01-15 00:00:00+00:00,291850,3,1.5,1060,9711,1,0
2414600126,2015-04-15 00:00:00+00:00,229500,3,1.0,1780,7470,1,0
3793500160,2015-03-12 00:00:00+00:00,323000,3,2.5,1890,6560,2,0

view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat
0,3,7,1180,0,1955,0,98178,47.51123398
0,3,7,2170,400,1951,1991,98125,47.72102274
0,3,6,770,0,1933,0,98028,47.73792661
0,5,7,1050,910,1965,0,98136,47.52082
0,3,8,1680,0,1987,0,98074,47.61681228
0,3,11,3890,1530,2001,0,98053,47.65611835
0,3,7,1715,0,1995,0,98003,47.30972002
0,3,7,1060,0,1963,0,98198,47.40949984
0,3,7,1050,730,1960,0,98146,47.51229381
0,3,7,1890,0,2003,0,98038,47.36840673

long,sqft_living15,sqft_lot15
-122.25677536,1340.0,5650.0
-122.3188624,1690.0,7639.0
-122.23319601,2720.0,8062.0
-122.39318505,1360.0,5000.0
-122.04490059,1800.0,7503.0
-122.00528655,4760.0,101930.0
-122.32704857,2238.0,6819.0
-122.31457273,1650.0,9711.0
-122.33659507,1780.0,8113.0
-122.0308176,2390.0,7570.0


# Explore

In [39]:
sales.show()

In [42]:
turicreate.show(sales[1:5000]['sqft_living'],sales[1:5000]['price'])

# Simple regression model that predicts price from square feet

In [43]:
training_set, test_set = sales.random_split(.8,seed=0)

## train simple regression model

In [44]:
sqft_model = turicreate.linear_regression.create(training_set,target='price',features=['sqft_living'])

PROGRESS: Creating a validation set from 5 percent of training data. This may take a while.
          You can set ``validation_set=None`` to disable validation tracking.



# Evaluate the quality of our model

In [45]:
print (test_set['price'].mean())

543054.0425632538


In [46]:
turicreate.show(sales[1:5000]['price'],sales[1:5000]['zipcode'])

In [47]:
print (sqft_model.evaluate(test_set))

{'max_error': 4142787.893996685, 'rmse': 255195.3012642054}


# Explore model a little further

In [34]:
sqft_model.coefficients

name,index,value,stderr
(intercept),,-48876.577176038176,5054.303997830652
sqft_living,,282.878389360466,2.22276623832183


In [91]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(test_set['sqft_living'],test_set['price'],'.',
        test_set['sqft_living'],sqft_model.predict(test_set),'-')

ModuleNotFoundError: No module named 'matplotlib'

# Explore other features of the data

In [29]:
my_features = ['bedrooms','bathrooms','sqft_living','sqft_lot','floors','zipcode']

In [30]:
sales[my_features].show()

In [31]:
turicreate.show(sales['zipcode'],sales['price'])

# Build a model with these additional features

In [32]:
my_features_model = turicreate.linear_regression.create(training_set,target='price',features=my_features)

PROGRESS: Creating a validation set from 5 percent of training data. This may take a while.
          You can set ``validation_set=None`` to disable validation tracking.



# Compare simple model with more complex one

In [33]:
print (my_features)

['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']


In [34]:
print (sqft_model.evaluate(test_set))
print (my_features_model.evaluate(test_set))

{'max_error': 4129133.671698495, 'rmse': 255231.58624169265}
{'max_error': 3502319.7971841535, 'rmse': 179702.46589848842}


# Apply learned models to make predictions

In [35]:
house1 = sales[sales['id']=='5309101200']

In [36]:
house1

id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront
5309101200,2014-06-05 00:00:00+00:00,620000,4,2.25,2400,5350,1.5,0

view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat
0,4,7,1460,940,1929,0,98117,47.67632376

long,sqft_living15,sqft_lot15
-122.37010126,1250.0,4880.0


<img src="http://blue.kingcounty.com/Assessor/eRealProperty/MediaHandler.aspx?Media=2916871">

In [37]:
print (house1['price'])

[620000, ... ]


In [38]:
print (sqft_model.predict(house1))

[630522.0426558993]


In [39]:
print (my_features_model.predict(house1))

[722319.1432776065]


## Prediction for a second house, a fancier one

In [40]:
house2 = sales[sales['id']=='1925069082']

In [41]:
house2

id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront
1925069082,2015-05-11 00:00:00+00:00,2200000,5,4.25,4640,22703,2,1

view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat
4,5,8,2860,1780,1952,0,98052,47.63925783

long,sqft_living15,sqft_lot15
-122.09722322,3140.0,14200.0


<img src="https://ssl.cdn-redfin.com/photo/1/bigphoto/302/734302_0.jpg">

In [42]:
print (sqft_model.predict(house2))

[1266139.0252788842]


In [43]:
print (my_features_model.predict(house2))

[1433032.4458761045]


## Prediction for a super fancy home

In [44]:
bill_gates = {'bedrooms':[8], 
              'bathrooms':[25], 
              'sqft_living':[50000], 
              'sqft_lot':[225000],
              'floors':[4], 
              'zipcode':['98039'], 
              'condition':[10], 
              'grade':[10],
              'waterfront':[1],
              'view':[4],
              'sqft_above':[37500],
              'sqft_basement':[12500],
              'yr_built':[1994],
              'yr_renovated':[2010],
              'lat':[47.627606],
              'long':[-122.242054],
              'sqft_living15':[5000],
              'sqft_lot15':[40000]}

<img src="https://upload.wikimedia.org/wikipedia/commons/2/26/Residence_of_Bill_Gates.jpg">

In [45]:
print (my_features_model.predict(turicreate.SFrame(bill_gates)))

[13727168.407053864]


# Homework

# Task 1

In [137]:
n=0;
for i in sales['zipcode']: 
    if i == '98039':
            n=n+1
print (n) 
        

50


In [138]:
zip98178= sales[sales['zipcode']=='98039']
zip98178['price'].mean()

2160606.5999999996

# Task 2

In [126]:
sales['sqft_living'] > 2000 

dtype: int
Rows: 21613
[0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, ... ]

In [143]:
result1 = sales['sqft_living'] > 2000 
result2 = sales['sqft_living'] < 4000 
re=result1 & result2
n=0;
for i in re: 
    n=n+i
print(n)    
print(n/21613)    

9111
0.4215518437977143


# Task 3

In [132]:
my_features = ['bedrooms','bathrooms','sqft_living','sqft_lot','floors','zipcode']

In [130]:
advanced_features = [ 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode','condition', 'grade', 'waterfront', 'view','sqft_above','sqft_basement','yr_built', 'yr_renovated', 'lat', 'long', 'sqft_living15', 'sqft_lot15']

In [135]:
my_features_model = turicreate.linear_regression.create(training_set,target='price',validation_set = None,features=my_features)

In [136]:
my_advance_features_model = turicreate.linear_regression.create(training_set,target='price',validation_set = None,features=advanced_features)

In [141]:
print (my_features_model.evaluate(test_set))
print (my_advance_features_model.evaluate(test_set))

{'max_error': 3486584.5093819317, 'rmse': 179542.43331269105}
{'max_error': 3556849.4138480965, 'rmse': 156831.1168019102}


In [144]:
179542.43331269105-156831.1168019102

22711.31651078086