In [1]:
import pandas as pd
from sklearn import linear_model

In [3]:
# Importing home prices dataset from csv file
home_dataset = pd.read_csv("home-prices-dataset.csv")

In [4]:
home_dataset

Unnamed: 0,town,area,price
0,monroe township,2600,550000
1,monroe township,3000,565000
2,monroe township,3200,610000
3,monroe township,3600,680000
4,monroe township,4000,725000
5,west windsor,2600,585000
6,west windsor,2800,615000
7,west windsor,3300,650000
8,west windsor,3600,710000
9,robinsville,2600,575000


In [7]:
dummies = pd.get_dummies(home_dataset.town)
dummies

Unnamed: 0,monroe township,robinsville,west windsor
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0
5,0,0,1
6,0,0,1
7,0,0,1
8,0,0,1
9,0,1,0


In [9]:
merged_dataset = pd.concat([home_dataset,dummies],axis='columns')
merged_dataset

Unnamed: 0,town,area,price,monroe township,robinsville,west windsor
0,monroe township,2600,550000,1,0,0
1,monroe township,3000,565000,1,0,0
2,monroe township,3200,610000,1,0,0
3,monroe township,3600,680000,1,0,0
4,monroe township,4000,725000,1,0,0
5,west windsor,2600,585000,0,0,1
6,west windsor,2800,615000,0,0,1
7,west windsor,3300,650000,0,0,1
8,west windsor,3600,710000,0,0,1
9,robinsville,2600,575000,0,1,0


In [15]:
# Dropping columns : town , west winsdor
final_dataset = merged_dataset.drop(["town","west windsor"],axis="columns")
final_dataset

Unnamed: 0,area,price,monroe township,robinsville
0,2600,550000,1,0
1,3000,565000,1,0
2,3200,610000,1,0
3,3600,680000,1,0
4,4000,725000,1,0
5,2600,585000,0,0
6,2800,615000,0,0
7,3300,650000,0,0
8,3600,710000,0,0
9,2600,575000,0,1


In [17]:
# Creating Linear Regression Object
hpp_model = linear_model.LinearRegression()

In [22]:
# Independent Variables
X = final_dataset.drop(['price'],axis='columns')
X

Unnamed: 0,area,monroe township,robinsville
0,2600,1,0
1,3000,1,0
2,3200,1,0
3,3600,1,0
4,4000,1,0
5,2600,0,0
6,2800,0,0
7,3300,0,0
8,3600,0,0
9,2600,0,1


In [23]:
# Dependent Variable
y = final_dataset['price']
y

0     550000
1     565000
2     610000
3     680000
4     725000
5     585000
6     615000
7     650000
8     710000
9     575000
10    600000
11    620000
12    695000
Name: price, dtype: int64

In [24]:
# Training Model
hpp_model.fit(X,y)

LinearRegression()

In [27]:
# Model Coefficients
print("Area Coefficient : {:.4f}".format(hpp_model.coef_[0]))
print("Monroe Township Coefficient : {:.4f}".format(hpp_model.coef_[1]))
print("Robinsville Coefficient : {:.4f}".format(hpp_model.coef_[2]))

Area Coefficient : 126.8974
Monroe Township Coefficient : -40013.9755
Robinsville Coefficient : -14327.5640


In [28]:
# Model Intercept
print("Model Intercept : {:.4f}".format(hpp_model.intercept_))

Model Intercept : 249790.3677


In [31]:
# Testing Model
prices_predicted = pd.DataFrame({"Predicted Price":hpp_model.predict(X)})
prices_predicted

Unnamed: 0,Predicted Price
0,539709.739841
1,590468.716405
2,615848.204687
3,666607.181251
4,717366.157816
5,579723.71533
6,605103.203612
7,668551.924317
8,706621.15674
9,565396.151365


In [33]:
# Predicted Results
results = pd.concat([X,y,prices_predicted],axis="columns")
results

Unnamed: 0,area,monroe township,robinsville,price,Predicted Price
0,2600,1,0,550000,539709.739841
1,3000,1,0,565000,590468.716405
2,3200,1,0,610000,615848.204687
3,3600,1,0,680000,666607.181251
4,4000,1,0,725000,717366.157816
5,2600,0,0,585000,579723.71533
6,2800,0,0,615000,605103.203612
7,3300,0,0,650000,668551.924317
8,3600,0,0,710000,706621.15674
9,2600,0,1,575000,565396.151365


In [36]:
# Model Score
print("Model Score : {:.4f}".format(hpp_model.score(X,y)))

Model Score : 0.9574


In [34]:
# Exporting Results to csv file
results.to_csv("results.csv")

In [35]:
# Saving Model
import joblib
joblib.dump(hpp_model,"hpp-model-ohe")

['hpp-model-ohe']