In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
%matplotlib inline
pd.set_option('display.max_columns',None)
import warnings
warnings.filterwarnings("ignore")

In [None]:
train = pd.read_csv('../input/house-prices-advanced-regression-techniques/train.csv')
test = pd.read_csv('../input/house-prices-advanced-regression-techniques/test.csv')

In [None]:
train.head()

In [None]:
test.head()

In [None]:
train.describe().T.style.bar(subset=['mean'], color='#205ff2')\
                            .background_gradient(subset=['std'], cmap='Reds')\
                             .background_gradient(subset=['50%'], cmap='coolwarm')

In [None]:
train.isnull().sum()

In [None]:
round(train.corr(),2)

In [None]:
plt.figure(figsize=(30,25))
sns.heatmap(round(train.corr(),2),annot=True,cmap='Blues')

In [None]:
train.shape,test.shape

In [None]:
plt.figure(figsize=(35,25))
sns.heatmap(data=train.isnull())

In [None]:
# Handling train data
train['LotFrontage'] = train['LotFrontage'].fillna(train['LotFrontage'].mean())
train = train.drop('Alley',axis=1) # we drop the Alley column because it has a lot of null values
train['MasVnrType'] = train['MasVnrType'].fillna(train['MasVnrType'].mode()[0])
train['MasVnrArea'] = train['MasVnrArea'].fillna(train['MasVnrArea'].mode()[0])
train['BsmtQual'] = train['BsmtQual'].fillna(train['BsmtQual'].mode()[0])
train['BsmtCond'] = train['BsmtCond'].fillna(train['BsmtCond'].mode()[0])
train['BsmtExposure'] = train['BsmtExposure'].fillna(train['BsmtExposure'].mode()[0])
train['BsmtFinType1'] = train['BsmtFinType1'].fillna(train['BsmtFinType1'].mode()[0])
train['BsmtFinType2'] = train['BsmtFinType2'].fillna(train['BsmtFinType2'].mode()[0])
train['Electrical'] = train['Electrical'].fillna(train['Electrical'].mode()[0])
train['FireplaceQu']=train['FireplaceQu'].fillna(train['FireplaceQu'].mode()[0])
train['GarageType']=train['GarageType'].fillna(train['GarageType'].mode()[0])
train['GarageFinish']=train['GarageFinish'].fillna(train['GarageFinish'].mode()[0])
train['GarageQual']=train['GarageQual'].fillna(train['GarageQual'].mode()[0])
train['GarageCond']=train['GarageCond'].fillna(train['GarageCond'].mode()[0])
train.drop(['GarageYrBlt','PoolQC','Fence','MiscFeature'],axis=1,inplace=True) # dropping these columns because they contain lot of null values

In [None]:
train.dropna(inplace=True)
train.shape

In [None]:
# Handling test data
test['LotFrontage'] = test['LotFrontage'].fillna(test['LotFrontage'].mean())
test = test.drop('Alley',axis=1) # we drop the Alley column because it has a lot of null values
test['MasVnrType'] = test['MasVnrType'].fillna(test['MasVnrType'].mode()[0])
test['MasVnrArea'] = test['MasVnrArea'].fillna(test['MasVnrArea'].mode()[0])
test['BsmtQual'] = test['BsmtQual'].fillna(test['BsmtQual'].mode()[0])
test['BsmtCond'] = test['BsmtCond'].fillna(test['BsmtCond'].mode()[0])
test['BsmtExposure'] = test['BsmtExposure'].fillna(test['BsmtExposure'].mode()[0])
test['BsmtFinType1'] = test['BsmtFinType1'].fillna(test['BsmtFinType1'].mode()[0])
test['BsmtFinType2'] = test['BsmtFinType2'].fillna(test['BsmtFinType2'].mode()[0])
test['Electrical'] = test['Electrical'].fillna(test['Electrical'].mode()[0])
test['FireplaceQu']=test['FireplaceQu'].fillna(test['FireplaceQu'].mode()[0])
test['GarageType']=test['GarageType'].fillna(test['GarageType'].mode()[0])
test['GarageFinish']=test['GarageFinish'].fillna(test['GarageFinish'].mode()[0])
test['GarageQual']=test['GarageQual'].fillna(test['GarageQual'].mode()[0])
test['GarageCond']=test['GarageCond'].fillna(test['GarageCond'].mode()[0])
test.drop(['GarageYrBlt','PoolQC','Fence','MiscFeature'],axis=1,inplace=True) # dropping these columns because they contain lot of null values

In [None]:
test.dropna(inplace=True)
test.shape

In [None]:
plt.figure(figsize=(35,25))
sns.heatmap(data=train.isnull())

In [None]:
train.info(),test.info()

In [None]:
final_data = pd.concat([train,test],axis=0)

In [None]:
cat_feature = [feature for feature in final_data.columns if final_data[feature].dtype=="O"]

In [None]:
final_data['MSZoning'] = final_data['MSZoning'].astype(str).map({'RL':1, 'RM':2, 'C (all)':3, 'FV':4, 'RH':5})

In [None]:
final_data['SaleType'] = final_data['SaleType'].astype(str).map({'WD':1,'New':2,'COD':3,'ConLD':4,'ConLI':5,'CWD':6,'ConLw':7,'Con':8,'Oth':9})
final_data['SaleCondition'] = final_data['SaleCondition'].astype(str).map({'Normal':1, 'Abnorml':2, 'Partial':3, 'AdjLand':4, 'Alloca':5 ,'Family':6})

In [None]:
final_data.drop(cat_feature,axis=1,inplace=True)

In [None]:
final_data['SalePrice'] = final_data['SalePrice'].fillna(final_data['SalePrice'].mean())

In [None]:
final_data.isnull().sum()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = final_data.drop('SalePrice',axis=1)
y = final_data['SalePrice']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.60,random_state=101)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_log_error,r2_score
from sklearn import metrics

In [None]:
lm = LinearRegression()
lm.fit(X_train,y_train)
pred = abs(lm.predict(X_test))
print("MSLE: ",mean_squared_log_error(y_test, pred))
print('R2 Score: ',r2_score(y_test,pred))

In [None]:
rfr = RandomForestRegressor()
rfr.fit(X_train,y_train)
pred = rfr.predict(X_test)
print("MSLE: ",mean_squared_log_error(y_test, pred))
print('R2 Score: ',r2_score(y_test,pred))

In [None]:
pred = rfr.predict(X_test)
pred = pred.round(decimals=2)

In [None]:
submission = pd.read_csv('../input/house-prices-advanced-regression-techniques/sample_submission.csv',usecols=['Id'])

In [None]:
submission['SalePrice'] = pd.DataFrame(pred)

In [None]:
submission.to_csv('my_submission.csv',index=False)