In [293]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler , OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

In [294]:
pd.set_option('display.max_rows' , None)
pd.set_option('display.max_columns' , None)
pd.set_option('display.max_colWidth' , None)

In [295]:
df = pd.read_csv('gurgaon_properties_post_feature_selection_v2 (1).csv')

In [296]:
df.columns

Index(['property_type', 'sector', 'price', 'bedRoom', 'bathroom', 'balcony',
       'agePossession', 'built_up_area', 'servant room', 'store room',
       'furnishing_type', 'luxury_category', 'floor_category'],
      dtype='object')

In [297]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3554 entries, 0 to 3553
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   property_type    3554 non-null   object 
 1   sector           3554 non-null   object 
 2   price            3554 non-null   float64
 3   bedRoom          3554 non-null   float64
 4   bathroom         3554 non-null   float64
 5   balcony          3554 non-null   object 
 6   agePossession    3554 non-null   object 
 7   built_up_area    3554 non-null   float64
 8   servant room     3554 non-null   float64
 9   store room       3554 non-null   float64
 10  furnishing_type  3554 non-null   float64
 11  luxury_category  3554 non-null   object 
 12  floor_category   3554 non-null   object 
dtypes: float64(7), object(6)
memory usage: 361.1+ KB


In [298]:
df.head(2)

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,0.82,3.0,2.0,2,New Property,850.0,0.0,0.0,0.0,Low,Low Floor
1,flat,sector 89,0.95,2.0,2.0,2,New Property,1226.0,1.0,0.0,0.0,Low,Mid Floor


In [299]:
df.shape

(3554, 13)

In [300]:
df.isnull().sum()

Unnamed: 0,0
property_type,0
sector,0
price,0
bedRoom,0
bathroom,0
balcony,0
agePossession,0
built_up_area,0
servant room,0
store room,0


In [301]:
df['furnishing_type'].value_counts()

Unnamed: 0_level_0,count
furnishing_type,Unnamed: 1_level_1
0.0,2349
1.0,1018
2.0,187


In [302]:
df['furnishing_type'] = df['furnishing_type'].replace({0.0:'unfurnished' , 1.0:'semi-furnished' , 2.0:'furnished'})

In [303]:
df['furnishing_type'].value_counts()

Unnamed: 0_level_0,count
furnishing_type,Unnamed: 1_level_1
unfurnished,2349
semi-furnished,1018
furnished,187


In [304]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,0.82,3.0,2.0,2,New Property,850.0,0.0,0.0,unfurnished,Low,Low Floor
1,flat,sector 89,0.95,2.0,2.0,2,New Property,1226.0,1.0,0.0,unfurnished,Low,Mid Floor
2,flat,sohna road,0.32,2.0,2.0,1,New Property,1000.0,0.0,0.0,unfurnished,Low,High Floor
3,flat,sector 92,1.6,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,semi-furnished,High,Mid Floor
4,flat,sector 102,0.48,2.0,2.0,1,Relatively New,582.0,0.0,1.0,unfurnished,High,Mid Floor


In [305]:
X = df.drop(columns=['price'])
y = df['price']

In [306]:
y.isnull().sum()

np.int64(0)

In [307]:
# Applying the log1p transformation to the target variable
y_transformed = np.log1p(y)

In [308]:
y_transformed.sample(10)

Unnamed: 0,price
473,0.500775
721,0.71784
1907,1.386294
3313,1.360977
635,1.909543
530,0.576613
2348,1.321756
2058,0.81093
2708,0.883768
671,1.163151


In [309]:
# Here we apply 3 types of encoading
# 1) -> Ordinal Encoading
# 2) -> One hot Encoading
# 3) -> Target Encoading

# **Ordinal Encoading**

In [310]:
columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

In [311]:
# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), columns_to_encode)
    ],
    remainder='passthrough'
)

In [312]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [313]:
# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

In [314]:
scores.mean()

np.float64(0.7363096633436828)

In [315]:
scores.std()

np.float64(0.03238005754429932)

In [316]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

In [317]:
pipeline.fit(X_train,y_train)

In [318]:
y_pred = pipeline.predict(X_test)

In [319]:
y_pred = np.expm1(y_pred)

In [320]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(np.expm1(y_test),y_pred)

0.946382216008936

In [321]:
def scorer(model_name, model):

    output = []

    output.append(model_name)

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

    output.append(scores.mean())

    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

    pipeline.fit(X_train,y_train)

    y_pred = pipeline.predict(X_test)

    y_pred = np.expm1(y_pred)

    output.append(mean_absolute_error(np.expm1(y_test),y_pred))

    return output

In [322]:
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor,ExtraTreesRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor

model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [323]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [324]:
model_output

[['linear_reg', np.float64(0.7363096633436828), 0.946382216008936],
 ['svr', np.float64(0.7642021216646014), 0.8472636473483917],
 ['ridge', np.float64(0.7363125343993554), 0.9463387741853388],
 ['LASSO', np.float64(0.05943378064493573), 1.528905986892753],
 ['decision tree', np.float64(0.7734845123987093), 0.7518664628678968],
 ['random forest', np.float64(0.8801939105175908), 0.5251769207426612],
 ['extra trees', np.float64(0.8680943845307171), 0.5528814162652566],
 ['gradient boosting', np.float64(0.8725075923408557), 0.5768323401247935],
 ['adaboost', np.float64(0.751762933601522), 0.8337056779083977],
 ['mlp', np.float64(0.8081284885533782), 0.7060723090026295],
 ['xgboost', np.float64(0.8894876835260124), 0.5040475127230885]]

In [325]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])

In [326]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
10,xgboost,0.889488,0.504048
5,random forest,0.880194,0.525177
6,extra trees,0.868094,0.552881
7,gradient boosting,0.872508,0.576832
9,mlp,0.808128,0.706072
4,decision tree,0.773485,0.751866
8,adaboost,0.751763,0.833706
1,svr,0.764202,0.847264
2,ridge,0.736313,0.946339
0,linear_reg,0.73631,0.946382


**Ovserbation**
- best r2score = 0.88
- best mae = 0.50
- best algo = Xgboost

# **OneHotEncoading**

In [327]:
# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first' , handle_unknown='ignore'),['sector','agePossession','furnishing_type'])
    ],
    remainder='passthrough'
)

In [328]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [329]:
# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

In [330]:
scores.mean()

np.float64(0.8546112792716141)

In [331]:
scores.std()

np.float64(0.01599323234861558)

In [332]:
X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

In [333]:
pipeline.fit(X_train,y_train)

In [334]:
y_pred = pipeline.predict(X_test)
y_pred = np.expm1(y_pred)
mean_absolute_error(np.expm1(y_test),y_pred)

0.6497696680344619

In [335]:
def scorer(model_name, model):

    output = []

    output.append(model_name)

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

    output.append(scores.mean())

    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

    pipeline.fit(X_train,y_train)

    y_pred = pipeline.predict(X_test)

    y_pred = np.expm1(y_pred)

    output.append(mean_absolute_error(np.expm1(y_test),y_pred))

    return output

In [336]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [337]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [338]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])

In [339]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
6,extra trees,0.894769,0.47296
10,xgboost,0.89585,0.493456
5,random forest,0.890522,0.497932
7,gradient boosting,0.876331,0.570392
9,mlp,0.874152,0.585037
0,linear_reg,0.854611,0.64977
2,ridge,0.854676,0.652899
4,decision tree,0.80856,0.696767
8,adaboost,0.75581,0.8338
1,svr,0.769752,0.834124


**Observation**
- best model -> Extra Trees
- best mae -> 0.472
- best r2Score -> 0.8947

- mae is reduced from 0.49 to 0.47

#**OneHotEncoding With PCA**


In [340]:
# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False, handle_unknown='ignore'),['sector','agePossession'])
    ],
    remainder='passthrough'
)

In [341]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('pca', PCA(n_components=0.95)),
    ('regressor', LinearRegression())
])

In [342]:
# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

In [343]:
scores.mean()

np.float64(0.06225201431451134)

In [344]:
scores.std()

np.float64(0.019860594071640165)

In [345]:
def scorer(model_name, model):

    output = []

    output.append(model_name)

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('pca', PCA(n_components=0.95)),
        ('regressor', model)
    ])

    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

    output.append(scores.mean())

    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

    pipeline.fit(X_train,y_train)

    y_pred = pipeline.predict(X_test)

    y_pred = np.expm1(y_pred)

    output.append(mean_absolute_error(np.expm1(y_test),y_pred))

    return output

In [346]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [347]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [348]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])

In [349]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
5,random forest,0.762281,0.653861
6,extra trees,0.738719,0.696616
4,decision tree,0.696442,0.761509
10,xgboost,0.622205,0.967581
7,gradient boosting,0.610623,0.987906
8,adaboost,0.303943,1.345162
1,svr,0.218073,1.361198
9,mlp,0.213619,1.407659
2,ridge,0.062252,1.526707
0,linear_reg,0.062252,1.526707


**Observation**
- result are become down from previous one

# **TargetEncoading**

In [350]:
!pip install category_encoders



In [354]:
import category_encoders as ce

columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ],
    remainder='passthrough'
)

In [355]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [356]:
# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

In [357]:
scores.mean(),scores.std()

(np.float64(0.829521918225536), np.float64(0.018384463379122903))

In [358]:
def scorer(model_name, model):

    output = []

    output.append(model_name)

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

    output.append(scores.mean())

    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

    pipeline.fit(X_train,y_train)

    y_pred = pipeline.predict(X_test)

    y_pred = np.expm1(y_pred)

    output.append(mean_absolute_error(np.expm1(y_test),y_pred))

    return output

In [359]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [360]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [361]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])

In [362]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
10,xgboost,0.904798,0.447518
6,extra trees,0.901049,0.456096
5,random forest,0.902099,0.462697
7,gradient boosting,0.888941,0.508015
4,decision tree,0.833127,0.555694
9,mlp,0.852458,0.610238
0,linear_reg,0.829522,0.713011
2,ridge,0.829536,0.713523
8,adaboost,0.819194,0.71707
1,svr,0.782917,0.818851


In [363]:
from sklearn.model_selection import GridSearchCV

In [364]:
param_grid = {
    'regressor__n_estimators': [50, 100, 200, 300],
    'regressor__max_depth': [None, 10, 20, 30],
    'regressor__max_samples':[0.1, 0.25, 0.5, 1.0],
    'regressor__max_features': ['auto', 'sqrt']
}

In [365]:
columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ],
    remainder='passthrough'
)

In [366]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor())
])

In [367]:
kfold = KFold(n_splits=10, shuffle=True, random_state=42)

In [368]:
search = GridSearchCV(pipeline, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)

In [369]:
search.fit(X, y_transformed)

Fitting 10 folds for each of 128 candidates, totalling 1280 fits


640 fits failed out of a total of 1280.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
640 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.11/dist-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "/usr/local/lib/python3.11/dist-packages/sklearn/base.py", line 1382,

In [370]:
final_pipe = search.best_estimator_

In [371]:
search.best_params_

{'regressor__max_depth': 20,
 'regressor__max_features': 'sqrt',
 'regressor__max_samples': 1.0,
 'regressor__n_estimators': 100}

In [372]:
search.best_score_

np.float64(0.9032284535081097)

In [373]:
final_pipe.fit(X,y_transformed)

# **Exporting The Model**

In [374]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['sector','agePossession'])
    ],
    remainder='passthrough'
)

In [375]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=500))
])

In [376]:
pipeline.fit(X,y_transformed)

In [377]:
import pickle

with open('pipeline.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

In [378]:
with open('df.pkl', 'wb') as file:
    pickle.dump(X, file)

In [379]:
X

Unnamed: 0,property_type,sector,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,3.0,2.0,2,New Property,850.0,0.0,0.0,unfurnished,Low,Low Floor
1,flat,sector 89,2.0,2.0,2,New Property,1226.0,1.0,0.0,unfurnished,Low,Mid Floor
2,flat,sohna road,2.0,2.0,1,New Property,1000.0,0.0,0.0,unfurnished,Low,High Floor
3,flat,sector 92,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,semi-furnished,High,Mid Floor
4,flat,sector 102,2.0,2.0,1,Relatively New,582.0,0.0,1.0,unfurnished,High,Mid Floor
5,flat,gwal pahari,4.0,4.0,3+,New Property,4842.0,1.0,0.0,semi-furnished,Low,High Floor
6,flat,sector 108,3.0,3.0,3+,Relatively New,2116.0,1.0,0.0,unfurnished,Medium,High Floor
7,flat,sector 102,3.0,4.0,3,Relatively New,1710.0,1.0,0.0,unfurnished,High,Mid Floor
8,house,sector 105,3.0,2.0,1,Old Property,1185.51,0.0,0.0,unfurnished,Low,Low Floor
9,house,sector 26,3.0,3.0,2,Moderately Old,1350.0,1.0,0.0,unfurnished,Medium,Low Floor


# **Trying out the predictions**

In [380]:
X.columns

Index(['property_type', 'sector', 'bedRoom', 'bathroom', 'balcony',
       'agePossession', 'built_up_area', 'servant room', 'store room',
       'furnishing_type', 'luxury_category', 'floor_category'],
      dtype='object')

In [381]:
X.iloc[0].values

array(['flat', 'sector 36', np.float64(3.0), np.float64(2.0), '2',
       'New Property', np.float64(850.0), np.float64(0.0),
       np.float64(0.0), 'unfurnished', 'Low', 'Low Floor'], dtype=object)

In [404]:
data = [['house', 'sector 102', 3, 1, '3+', 'New Property', 1700, 0, 0, 'furnished', 'Low', 'Low Floor']]
columns = ['property_type', 'sector', 'bedRoom', 'bathroom', 'balcony',
       'agePossession', 'built_up_area', 'servant room', 'store room',
       'furnishing_type', 'luxury_category', 'floor_category']

# Convert to DataFrame
one_df = pd.DataFrame(data, columns=columns)

one_df

Unnamed: 0,property_type,sector,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,house,sector 102,3,1,3+,New Property,1700,0,0,furnished,Low,Low Floor


In [405]:
np.expm1(pipeline.predict(one_df))

array([2.30524908])