In [1]:
import warnings
import copy
import numpy as np
import pandas as pd
import sklearn
import seaborn as sns
import plotly.io as pio
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from math import sqrt
from plotly.offline import iplot
from plotly.subplots import make_subplots
from datetime import datetime
from sklearn.model_selection import KFold
from sklearn.impute import SimpleImputer
from scipy.stats import pearsonr, spearmanr
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import BaseEstimator, TransformerMixin
from joblib import Parallel, delayed
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif, f_regression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor

In [2]:
df = pd.read_csv('trainn_data.csv', encoding='latin-1',low_memory=False)

In [3]:
df.head()

Unnamed: 0,PRICE,OLD/NEW,DURATION,TOWN/CITY,Year,Month,Property_Type__D,Property_Type__F,Property_Type__S,Property_Type__T
0,11.225243,0,0,1.0,1996,7,0,0,1,0
1,11.461632,0,0,3.0,2010,4,1,0,0,0
2,9.998798,0,0,1.0,1999,2,0,0,0,1
3,11.982929,0,0,1.0,2010,6,0,0,0,1
4,12.423198,0,0,1.0,2015,8,0,0,1,0


In [4]:
print(df.shape)

(218953, 10)


In [5]:
df

Unnamed: 0,PRICE,OLD/NEW,DURATION,TOWN/CITY,Year,Month,Property_Type__D,Property_Type__F,Property_Type__S,Property_Type__T
0,11.225243,0,0,1.0,1996,7,0,0,1,0
1,11.461632,0,0,3.0,2010,4,1,0,0,0
2,9.998798,0,0,1.0,1999,2,0,0,0,1
3,11.982929,0,0,1.0,2010,6,0,0,0,1
4,12.423198,0,0,1.0,2015,8,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...
218948,12.352335,0,0,2.0,2002,4,1,0,0,0
218949,12.278161,0,0,5.0,2014,4,0,0,0,1
218950,11.891362,0,0,1.0,2003,2,0,0,1,0
218951,12.083905,0,0,1.0,2004,2,1,0,0,0


In [6]:
# Split data into features and target variable
X = df.drop('PRICE', axis=1)
y = df['PRICE']

In [7]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Selecting features and target variable
features = ['Year', 'Property_Type__D', 'Property_Type__F', 'Property_Type__S', 'Property_Type__T']
X_train = df[features]
y_train = df['PRICE']

# Define parameters for GradientBoostingRegressor
params = {
    'learning_rate': 0.05,
    'n_estimators': 500,
    'max_depth': 7, 
    'subsample': 0.8,
    'min_samples_leaf': 5
}
    
# Create and fit the GradientBoostingRegressor model
model_clf = GradientBoostingRegressor(**params)
model_clf.fit(X_train, y_train)

# Selecting features and target variable
features = ['Year', 'Property_Type__D', 'Property_Type__F', 'Property_Type__S', 'Property_Type__T']
X_train = df[features]
y_train = df['PRICE']

def predict_price(Property_Type, Year, model_clf, features):
    # Prepare input features in the correct order
    input_features = pd.DataFrame([[Year] + Property_Type], columns=features)
    predicted_price = model_clf.predict(input_features)[0]
    return predicted_price

# Get user input for Property_Type
Property_Type = input("Enter the Property Type (D, F, S, T): ")

# Predefined year for prediction
Year = 2024


# Validate input Property_Type
property_mapping = {'D': [1, 0, 0, 0], 'S': [0, 1, 0, 0], 'F': [0, 0, 1, 0], 'T': [0, 0, 0, 1]}

if Property_Type not in property_mapping:
    print("Invalid Property Type entered.")
else:
    Property_Type_encoded = property_mapping[Property_Type]  # Get the encoded value

    # Predict the price using the prepared data and trained model
    predicted_price = predict_price(Property_Type_encoded, Year, model_clf, features)
    
    # Calculate antilog using np.exp() function
    predicted_price_antilog = np.exp(predicted_price)

    # Format the predicted price
    formatted_price = "{:,.0f}".format(predicted_price_antilog)
    print("Predicted Price:", formatted_price)


Enter the Property Type (D, F, S, T): D
Predicted Price: 292,380
