# **HousePrice using LinearRegression**

In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

**Read dataset**

In [None]:
train_df = pd.read_csv('../input/house-prices-advanced-regression-techniques/train.csv')
train_df.head()

**Show heatmap**

In [None]:
k = 10
corrmat = train_df.corr()
cols = corrmat.nlargest(k, 'SalePrice')['SalePrice'].index
cm = np.corrcoef(train_df[cols].values.T)
sns.set(font_scale=1.25)
hm = sns.heatmap(cm,
                 cbar=True,
                 annot=True,
                 square=True,
                 fmt='.2f',
                 annot_kws={'size': 10},
                 yticklabels=cols.values,
                 xticklabels=cols.values)
plt.show()

**It can be seen that the correlation between SalePrice and OverallQual, GrLivArea, GarageCars, GarageArea, TotalBsmtSF, 1stFlrSF, FullBath, TotRmsAbvGrd, YearBuilt is strong.**

**Format only the data you need**

In [None]:
x_train = train_df[['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea', 'TotalBsmtSF', '1stFlrSF', 'FullBath', 'TotRmsAbvGrd', 'YearBuilt']]
x_train.head()

In [None]:
x_train.isnull().sum()

**Train in a LinearRegression**

In [None]:
y_train = train_df['SalePrice'].values
slr = LinearRegression()
slr.fit(x_train, y_train)
print('slope ： {:0.2f}'.format(slr.coef_[0]))
print('intercept : {:0.2f}'.format(slr.intercept_))

**Read dataset**

In [None]:
test_df = pd.read_csv('../input/house-prices-advanced-regression-techniques/test.csv')
test_df.head()

**Format only the data you need**

In [None]:
x_test = test_df[['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea', 'TotalBsmtSF', '1stFlrSF', 'FullBath', 'TotRmsAbvGrd', 'YearBuilt']]
x_test.head()

**Fill in missing values**

In [None]:
x_test.isnull().sum()

In [None]:
x_test['GarageCars'].fillna(test_df.GarageCars.median(), inplace = True)
x_test['GarageArea'].fillna(test_df.GarageArea.mean(), inplace = True)
x_test['TotalBsmtSF'].fillna(test_df.GarageArea.mean(), inplace = True)

In [None]:
x_test.isnull().sum()

**Predict the answer**

In [None]:
y_test = slr.predict(x_test)

In [None]:
sub = pd.DataFrame({'Id':test_df['Id'].values, 'SalePrice':y_test})
sub.head()

In [None]:
sub.to_csv('./submission.csv', index = False)