# Simple Example Notebook

This is just a simple example to get familiar with the environment and see if everything works.

It loads the Boston Housing Data Set and makes house price pridictions based on the number of rooms per house.

This notebook should display a scatter plot of all 506 data points and a line for our linear model.

Find more information about the data set at https://archive.ics.uci.edu/ml/datasets/Housing

In [None]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.cross_validation import cross_val_predict
from sklearn import linear_model
import matplotlib.pyplot as plt
%matplotlib inline

# Load dataset
boston = datasets.load_boston()

# Use 'number of rooms' as our only feature
X = np.reshape(boston.data[:,5], newshape=(boston.data.shape[0], 1))
y = np.reshape(boston.target, newshape=(boston.target.shape[0], 1))

# Fit a linear regression model to our data
lr = linear_model.LinearRegression()
lr.fit(X, y)

# Predict prices for 4 and 9 room houses
test_X = np.reshape([4, 9], (2, 1))
test_y_predicted = lr.predict(test_X)

# Plot data
font = {'family' : 'serif',
        'weight' : 'normal',
        'size'   : 15}
plt.rc('font', **font)
plt.figure(figsize=(20,10))
plt.scatter(x=X, y=y, label="Data", facecolor='r', s=50, lw=0.1, alpha=0.5)
plt.plot(test_X, test_y_predicted, 'k-', lw=3, label="Model")
plt.xlabel('Average number of rooms per dwelling')
plt.ylabel('Median value of owner-occupied homes in $1000s')
plt.legend()