In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import metrics

In [2]:
evaluation = pd.DataFrame({'Features':[],'MSE':[],'RMSE':[]})
                           
df = pd.read_csv('kc_house_data.csv')
print(df)

train_data,test_data = train_test_split(df,train_size = 0.85,random_state=3)

features = ['sqft_living']

lr = linear_model.LinearRegression()
X_train = np.array(train_data['sqft_living'], dtype=pd.Series).reshape(-1,1)
y_train = np.array(train_data['price'], dtype=pd.Series)
lr.fit(X_train,y_train)

X_test = np.array(test_data['sqft_living'], dtype=pd.Series).reshape(-1,1)
y_test = np.array(test_data['price'], dtype=pd.Series)


In [3]:
results = lr.predict(X_test)
print(results, y_test,"\n\n")

mse = np.mean((results-y_test)**2)
rmse = np.sqrt(mse)

r = evaluation.shape[0]
evaluation.loc[r] = ['sqft_living', format(mse, '.3f'), rmse]
print(evaluation)

plt.figure(figsize=(6.5,5))
plt.scatter(X_test,y_test,color='darkgreen',label="Data", alpha=1)
plt.plot(X_test,lr.predict(X_test),color="red",label="Predicted Regression Line")

In [4]:
features = ['bedrooms','bathrooms','sqft_living','sqft_lot','floors','zipcode']

complex_model = linear_model.LinearRegression()
complex_model.fit(train_data[features],train_data['price'])

results = complex_model.predict(test_data[features])

mse = metrics.mean_squared_error(test_data['price'],results)
rmse = np.sqrt(mse)

r = evaluation.shape[0]
evaluation.loc[r] = ['bedrooms,bathrooms,sqft_living,sqft_lot,floors,zipcode', format(mse, '.3f'), rmse]
print(evaluation)

In [5]:
features = ['sqft_living','sqft_lot','zipcode','lat','long']

complex_model.fit(train_data[features],train_data['price'])

results = complex_model.predict(test_data[features])

mse = metrics.mean_squared_error(test_data['price'],results)
rmse = np.sqrt(mse)

r = evaluation.shape[0]
evaluation.loc[r] = ['sqft_living,sqft_lot,zipcode,lat,long', format(mse, '.3f'), rmse]
print(evaluation)

In [6]:
features = ['sqft_living','sqft_lot','view','condition','grade','zipcode']

complex_model.fit(train_data[features],train_data['price'])

results = complex_model.predict(test_data[features])

mse = metrics.mean_squared_error(test_data['price'],results)
rmse = np.sqrt(mse)

r = evaluation.shape[0]
evaluation.loc[r] = ['sqft_living,sqft_lot,view,condition,grade,zipcode', format(mse, '.3f'), rmse]
print(evaluation)

In [7]:
features = ['bedrooms','bathrooms','sqft_living','sqft_lot','view','condition','grade','zipcode','floors']

complex_model.fit(train_data[features],train_data['price'])

results = complex_model.predict(test_data[features])

mse = metrics.mean_squared_error(test_data['price'],results)
rmse = np.sqrt(mse)

r = evaluation.shape[0]
evaluation.loc[r] = ['bedrooms,bathrooms,sqft_living,sqft_lot,view,condition,grade,zipcode,floors', format(mse, '.3f'), rmse]
print(evaluation)

In [8]:
features = ['bedrooms','bathrooms','sqft_living','sqft_lot','view','condition','grade','zipcode','floors','waterfront','sqft_above','sqft_basement','yr_built','yr_renovated','lat','long','sqft_living15','sqft_lot15']

complex_model.fit(train_data[features],train_data['price'])

results = complex_model.predict(test_data[features])

mse = metrics.mean_squared_error(test_data['price'],results)
rmse = np.sqrt(mse)

r = evaluation.shape[0]
evaluation.loc[r] = ['all features', format(mse, '.3f'), rmse]
print(evaluation)