# Import Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRFRegressor, XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
import warnings
warnings.filterwarnings("ignore")

# Load data

In [None]:
df = pd.read_csv("/kaggle/input/house-price-dataset-of-india/House Price India.csv")
type(df)

# Data Information & Visualization

In [None]:
df.head(10)

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df = df.drop(columns=['id','Date'])
df

In [None]:
def rename_columns(columns):
    return columns.lower().replace(' ','_')

In [None]:
df.rename(columns=rename_columns, inplace=True)

In [None]:
df.columns

In [None]:
matrix = np.triu(df.corr())
sns.heatmap(df.corr(), mask=matrix, cmap='vlag',
            linewidths=0.5, linecolor='white',annot=True, annot_kws={'size':5})

In [None]:
sns.displot(df.price, kde = True, color='lime')
plt.title('Prices distribution')

In [None]:
sns.pairplot(df)

# Train & Test Splitting the Data

In [None]:
train, test = train_test_split(df, test_size = 0.2)

In [None]:
x_train = train.iloc[:, :20].values
x_test = test.iloc[:, :20].values

In [None]:
y_train = train['price'].values
y_test = test['price'].values

In [None]:
def perform(y_pred):
    print("MSE",round(mean_squared_error(y_test,y_pred), 3))
    print("RMSE",round(np.sqrt(mean_squared_error(y_test,y_pred)), 3))
    print("MAE",round(mean_absolute_error(y_test,y_pred), 3))
    print("MAPE",round(mean_absolute_percentage_error(y_test,y_pred), 3))
    print("R2 Score : ", round(r2_score(y_test,y_pred), 3))

# Linear Regression

In [None]:
model_lr = LinearRegression()
model_lr.fit(x_train, y_train)

In [None]:
y_pred_lr = model_lr.predict(x_test)

In [None]:
plt.scatter(y_pred_lr, y_test, color='blue', marker='o')
plt.scatter(y_test, y_test, color='lime', marker='o')
plt.plot(y_test, y_test, color="red", lw=1.5)
plt.title("LinearRegressor")

In [None]:
perform(y_pred_lr)

# Decision Tree Regressor

In [None]:
model_dt = DecisionTreeRegressor()
model_dt.fit(x_train, y_train)

In [None]:
y_pred_dt = model_dt.predict(x_test)

In [None]:
plt.scatter(y_pred_dt, y_test, color='blue', marker='o')
plt.scatter(y_test, y_test, color='lime', marker='o')
plt.plot(y_test, y_test, color="red", lw=1.5)
plt.title("DecisionTreeRegressor")

In [None]:
perform(y_pred_dt)

# XGBRF Regressor

In [None]:
model_xgbrf = XGBRFRegressor()
model_xgbrf.fit(x_train, y_train)

In [None]:
y_pred_xgbrf = model_xgbrf.predict(x_test)

In [None]:
plt.scatter(y_pred_xgbrf, y_test, color='blue', marker='o')
plt.scatter(y_test, y_test, color='lime', marker='o')
plt.plot(y_test, y_test, color="red", lw=1.5)
plt.title("XGBRFRegressor")

In [None]:
perform(y_pred_xgbrf)

# XGB Regressor

In [None]:
model_xgb = XGBRegressor()
model_xgb.fit(x_train, y_train)

In [None]:
y_pred_xgb = model_xgb.predict(x_test)

In [None]:
plt.scatter(y_pred_xgb, y_test, color='blue', marker='o')
plt.scatter(y_test, y_test, color='lime', marker='o')
plt.plot(y_test, y_test, color="red", lw=1.5)
plt.title("XGBRegressor")

In [None]:
perform(y_pred_xgb)