<div class = "alert alert-info">
    <h1>Import Dependencies</h1>
</div>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings 
warnings.filterwarnings("ignore")

<div class= "alert alert-warning">
    <h2>Getting Data</h2>
</div>

In [None]:
df = pd.read_csv("/kaggle/input/rental-price-of-indias-it-capital-pune-mh-ind/train.csv")
df.head(1)

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df.duplicated().sum()

In [None]:
df = df.drop_duplicates()
df.reset_index(drop=True,inplace=True)
df.shape

In [None]:
df.describe()

In [None]:
df.corr()

In [None]:
plt.figure(figsize = (15,10))
sns.heatmap(df.corr(), annot = True, cmap="plasma")

<div class = "alert alert-danger">
    <h2>Data Preprocessing</h2>
</div>

In [None]:
from sklearn.preprocessing import LabelEncoder
cols = ['furnishing','avalable_for','facing','floor_type','address','gate_community','maintenance_amt','corner_pro','wheelchairadption','petfacility','propertyage']
df[cols] = df[cols].apply(LabelEncoder().fit_transform)
df.head()

In [None]:
from scipy import stats
zscore = np.abs(stats.zscore(df))
zscore.head()

In [None]:
print(np.where(zscore > 3))

In [None]:
df = df[(zscore<3).all(axis=1)]

In [None]:
X = df[["bedroom","bathrooms","area","furnishing","avalable_for","floor_number","parking","brok_amt"]]
y = df['rent']

In [None]:
X.head().T

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

<div class = "alert alert-success">
    <h2>Feature Scaling</h2>
</div>

In [None]:
from sklearn.preprocessing import MinMaxScaler
ms = MinMaxScaler()
x_train = ms.fit_transform(x_train)
x_test = ms.transform(x_test)

<div class = "alert alert-info">
    <h2>Model Traning</h2>
</div>

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRFRegressor, XGBRegressor
from sklearn.metrics import r2_score

In [None]:
k_reg = KNeighborsRegressor()
k_reg.fit(x_train,y_train)

k_pred = k_reg.predict(x_test)

k_acc = r2_score(y_test,k_pred)

print("Train accuracy:", (k_reg.score(x_train,y_train)))
print("Test accuracy:", (k_reg.score(x_test,y_test)))

In [None]:
l_reg = LinearRegression()
l_reg.fit(x_train,y_train)

l_pred = l_reg.predict(x_test)

l_acc = r2_score(y_test, l_pred)

print("Train accuracy:", (l_reg.score(x_train,y_train)))
print("Test accuracy:", (l_reg.score(x_test,y_test)))

In [None]:
d_reg = DecisionTreeRegressor()
d_reg.fit(x_train,y_train)
d_pred  = d_reg.predict(x_test)

d_acc = r2_score(y_test,d_pred)

print("Train accuracy:", (d_reg.score(x_train,y_train)))
print("Test accuracy:", (d_reg.score(x_test,y_test)))

In [None]:
r_reg = RandomForestRegressor()
r_reg.fit(x_train,y_train)

r_pred = r_reg.predict(x_test)
r_acc = r2_score(y_test,r_pred)

print("Training accuracy:",(r_reg.score(x_train,y_train)))
print("Test accuracy:",(r_reg.score(x_test,y_test)))

In [None]:
xgb = XGBRFRegressor()
xgb.fit(x_train,y_train)
xgb_pred = xgb.predict(x_test)
x_acc = r2_score(y_test,xgb_pred)

print("Training accuracy:",(xgb.score(x_train,y_train)))
print("Test accuracy:",(xgb.score(x_test,y_test)))

x_acc

In [None]:
models = pd.DataFrame({
    'Models': ['Knn','Linear','DecisionTree','RandomForest','XGB'],
    'Scores': [k_acc,l_acc,d_acc,r_acc,x_acc]

})

models.sort_values(by = 'Scores',ascending = False)

<div class = "alert alert-warning">
    <h2>Exporting</h2>
</div>

In [None]:
import pickle

pickle.dump(xgb,open("model.pkl","wb"))


In [None]:
rent_dataset = df.to_json(orient="records")

json_filename = "rent.json"

# Save JSON data to a file
with open(json_filename, "w") as json_file:
    json_file.write(rent_dataset)