In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\harik\\OneDrive\\Desktop\\HARIKRISHNAN_DETAILS\\Real_Estate_Predictor_Web_App\\Real_Estate_Price_Predictor_Web_App'

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir:Path
    data_file:Path
    transformed_data:Path
    date_time_handler_model_file:Path
    log_transformer_model_file:Path
    ordinal_encoder_model_file: Path
    nominal_encoder_model_file: Path
    remove_outlier_model_file: Path
    rare_categorical_handler_file: Path
    params_discrete_feature: list
    params_Id_column: list
    params_categorical_stratergy:str
    params_numerical_stratergy:str
    params_fill_value:str
    params_target_label:str
    params_rare_categorical_variable:str
    params_ordinal_categorical_feature:list
    params_nominal_categorical_feature:list

@dataclass(frozen=True)
class PredictConfig:
    date_time_handler_model_file: Path
    log_transformer_model_file: Path
    ordinal_encoder_model_file: Path
    nominal_encoder_model_file: Path
    rare_categorical_handler_file: Path
    remove_outlier_model_file: Path
    feature_scaling_model: Path
    best_model_directory: Path

In [5]:
from real_estate_price_predictor.constants import *
from real_estate_price_predictor.utils.common import read_yaml, create_directories, save_object

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir = config.root_dir,
            data_file = config.data_file,
            transformed_data = config.transformed_data_file,
            date_time_handler_model_file = config.date_time_handler_model_file,
            log_transformer_model_file = config.log_transformer_model_file,
            ordinal_encoder_model_file = config.ordinal_encoder_model_file,
            nominal_encoder_model_file =  config.nominal_encoder_model_file,
            remove_outlier_model_file = config.remove_outlier_model_file,
            rare_categorical_handler_file = config.rare_categorical_handler_file,
            params_discrete_feature = self.params.discrete_feature,
            params_Id_column = self.params.Id_column,
            params_categorical_stratergy = self.params.categorical_stratergy,
            params_numerical_stratergy = self.params.numerical_stratergy,
            params_fill_value = self.params.fill_value,
            params_target_label = self.params.target_label,
            params_rare_categorical_variable = self.params.rare_categorical_variable,
            params_ordinal_categorical_feature = self.params.ordinal_categorical_feature,
            params_nominal_categorical_feature = self.params.nominal_categorical_feature
        )

        return data_transformation_config
    
    def get_predict_config(self) -> PredictConfig:
        config = self.config.predict

        predict_config = PredictConfig(
            date_time_handler_model_file = config.date_time_handler_model_file ,
            log_transformer_model_file = config.log_transformer_model_file,
            ordinal_encoder_model_file = config.ordinal_encoder_model_file,
            nominal_encoder_model_file = config.nominal_encoder_model_file,
            rare_categorical_handler_file = config.rare_categorical_handler_file,
            remove_outlier_model_file = config.remove_outlier_model_file,
            feature_scaling_model = config.feature_scaling_model,
            best_model_directory = config.best_model_directory
        )

        return predict_config

In [7]:
class SeparatingDifferentFeatures:
    def __init__(self,config:DataTransformationConfig):
        self.config = config
    
    def read_data(self):
        dataset = pd.read_csv(self.config.data_file)
        return dataset

# Features with null values

    def features_with_null_values(self, df:pd.DataFrame):
        dataset = df
        features_with_na=[features for features in dataset.columns if dataset[features].isnull().sum()>1]
        return features_with_na
    
# Finding both numerical and categorical features with null values

    def num_and_categorical_features_with_na(self,df:pd.DataFrame, categorical:bool):
        numerical_features_with_na =[]
        categorical_features_with_na =[]
        features_with_na = self.features_with_null_values(df)
        for feature in features_with_na:
            if pd.api.types.is_numeric_dtype(df[feature]):
                numerical_features_with_na.append(feature)
            else:
                categorical_features_with_na.append(feature)
        if categorical:
            return categorical_features_with_na
        else: 
            return numerical_features_with_na
    
# Finding all the numerical features

    def total_numerical_features(self,dataset:pd.DataFrame):
        numerical_features = []
        for feature in dataset.columns:
            if dataset[feature].dtypes != 'O':
                numerical_features.append(feature)
        return numerical_features

# Finding year or datatime variable

    def finding_year_feature(self,dataset:pd.DataFrame):
        year_feature = []
        numerical_features = self.total_numerical_features(dataset)
        for feature in numerical_features:
            if 'Yr' in feature or 'Year' in feature:
                year_feature.append(feature)
        return year_feature
        
# Finding Continuous Variable

    def continous_variables(self,df:pd.DataFrame):
        continuous_feature=[]
        numerical_features = self.total_numerical_features(df) 
        year_feature = self.finding_year_feature(df)
        for feature in numerical_features:
            if feature not in self.config.params_discrete_feature+year_feature+self.config.params_Id_column:
                continuous_feature.append(feature)
        return continuous_feature

# Replacing the zeros with 1 to perform log transform

    def replacing_zeros_of_continuous_features(self,dataset:pd.DataFrame):
        continuous_feature = self.continous_variables(dataset)
        for feature in continuous_feature:
            dataset.loc[dataset[feature] == 0, feature] = 1
        return dataset

# Finding the categorical features

    def total_categorical_features(self,dataset:pd.DataFrame):
        return [feature for feature in dataset.columns if dataset[feature].dtypes=='O']

# Handling Missing Values by creating a new category for categroical and with median for numerical

    def filling_missing_values(self, dataset:pd.DataFrame):
        categorical_imputer = SimpleImputer(strategy= self.config.params_categorical_stratergy,fill_value=self.config.params_fill_value)
        numerical_imputer = SimpleImputer(strategy=self.config.params_numerical_stratergy)
        categorical_features = self.total_categorical_features(dataset)
        numerical_features = self.total_numerical_features(dataset)
        dataset[categorical_features] = categorical_imputer.fit_transform(dataset[categorical_features])
        dataset[numerical_features] = numerical_imputer.fit_transform(dataset[numerical_features])
        # save_object(self.config.categorical_imputer_model_file,categorical_imputer)
        # save_object(self.config.numerical_imputer_model_file,numerical_imputer)
        return dataset
    
# Saving the transformed data

    def save_the_transformed_data(self,dataset:pd.DataFrame):
        dataset.to_csv(self.config.transformed_data)

    def save_the_model(self,dict_of_transform_models:dict):
        for key in dict_of_transform_models.keys():
            if key == 'date_time_variables':
                save_object(self.config.date_time_handler_model_file,dict_of_transform_models[key])
            elif key == 'log_transformer':
                save_object(self.config.log_transformer_model_file,dict_of_transform_models[key])
            elif key == 'rare_categorical_values_handler':
                save_object(self.config.rare_categorical_handler_file,dict_of_transform_models[key])
            elif key == 'ordinal_encoder':
                save_object(self.config.ordinal_encoder_model_file,dict_of_transform_models[key])
            elif key == 'nominal_encoder': 
                save_object(self.config.nominal_encoder_model_file,dict_of_transform_models[key])
            elif key == 'remove_outliers_transformer':
                save_object(self.config.remove_outlier_model_file,dict_of_transform_models[key])

In [8]:
class handling_date_time_variables(BaseEstimator, TransformerMixin,SeparatingDifferentFeatures):
    def __init__(self,config:DataTransformationConfig): # no *args or **kargs 
         super().__init__(config)
    def fit(self, X, y=None):
         return self # nothing else to do
    def transform(self, X, y=None):
         year_features = self.finding_year_feature(X)
         for feature in year_features:
             if feature != 'YrSold':
                 X[feature]=X['YrSold']-X[feature]
         return X

In [9]:
class log_transform_of_numeric_variables(BaseEstimator, TransformerMixin,SeparatingDifferentFeatures):
    def __init__(self, config:DataTransformationConfig): # no *args or **kargs
         super().__init__(config)
    def fit(self, X, y=None):
         return self # nothing else to do
    def transform(self, X, y=None):
         continous_features = self.continous_variables(X)
         X = self.replacing_zeros_of_continuous_features(X)
         for feature in continous_features:
             X[feature]=np.log(X[feature])
         return X

In [10]:
class handling_rare_categorical_values(BaseEstimator, TransformerMixin,SeparatingDifferentFeatures):
    def __init__(self,config:DataTransformationConfig): # no *args or **kargs
         super().__init__(config)
         self.temp_df_dict={}
    def fit(self, X, y=None):
         categorical_features = self.total_categorical_features(X)
         for feature in categorical_features:
             temp=X.groupby(feature)[self.config.params_target_label].count()/len(X)
             temp_df=temp[temp>0.01].index
             self.temp_df_dict[feature]= temp_df
         return self # nothing else to do
    def transform(self, X, y=None):
         categorical_features = self.total_categorical_features(X)
         for feature in categorical_features:
             X[feature]=np.where(X[feature].isin(self.temp_df_dict[feature]),X[feature],self.config.params_rare_categorical_variable)
         return X

In [11]:
class handling_ordinal_categorical_values(BaseEstimator, TransformerMixin,SeparatingDifferentFeatures):
    def __init__(self,config:DataTransformationConfig): # no *args or **kargs
         self.label_ordered_feature ={}
         super().__init__(config)
    def fit(self, X, y=None):
         for feature in self.config.params_ordinal_categorical_feature:
             labels_ordered=X.groupby([feature])[self.config.params_target_label].mean().sort_values().index
             labels_ordered={k:i for i,k in enumerate(labels_ordered,0)}
             self.label_ordered_feature[feature] = labels_ordered
         return self
    def transform(self, X:pd.DataFrame, y=None):
         for feature in self.config.params_ordinal_categorical_feature:
           if feature in X.columns:
               X[feature]=X[feature].map(self.label_ordered_feature[feature])
         return X

In [12]:
class handling_nominal_categorical_values(BaseEstimator, TransformerMixin,SeparatingDifferentFeatures):
    def __init__(self,config:DataTransformationConfig): # no *args or **kargs
         self.label_nominal_feature ={}
         super().__init__(config)
    def fit(self, X, y=None):
         for feature in self.config.params_nominal_categorical_feature:
             nominal_label=X.groupby([feature])[self.config.params_target_label].mean().to_dict()
             self.label_nominal_feature[feature] = nominal_label
         return self
    def transform(self, X:pd.DataFrame, y=None):
         for feature in self.config.params_nominal_categorical_feature:
           if feature in X.columns:
               X[feature]=X[feature].map(self.label_nominal_feature[feature])
         return X

In [13]:
class handling_outliers_for_continous_variable(BaseEstimator, TransformerMixin,SeparatingDifferentFeatures):
    def __init__(self,config:DataTransformationConfig):
        self.iqr_boundaries_conitnous_feature ={}
        super().__init__(config)
        
    def fit(self, X, y=None):
        continuous_feature = self.continous_variables(X)
        for feature in continuous_feature:
            IQR=X[feature].quantile(0.75)-X[feature].quantile(0.25)
            lower_bridge=X[feature].quantile(0.25)-(IQR*3)
            upper_bridge=X[feature].quantile(0.75)+(IQR*3)
            self.iqr_boundaries_conitnous_feature[feature] = [lower_bridge,upper_bridge]
        return self
    def transform(self, X, y=None):
        continuous_feature = self.continous_variables(X)
        for feature in continuous_feature:
            lower_bridge, upper_bridge = self.iqr_boundaries_conitnous_feature[feature]
            X.loc[X[feature]<=lower_bridge,feature]=lower_bridge
            X.loc[X[feature]>=upper_bridge,feature]=upper_bridge
        return X

In [None]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = SeparatingDifferentFeatures(config=data_transformation_config)

    # Read the data

    dataset = data_transformation.read_data()

    # Handling Null Values

    dataset = data_transformation.filling_missing_values(dataset)

    # Handling date time variables

    date_time_variables = handling_date_time_variables(config=data_transformation_config)
    dataset = date_time_variables.transform(dataset)

    # Transforming the continous variables using logrithmic transform

    log_transform = log_transform_of_numeric_variables(config=data_transformation_config)
    dataset = log_transform.transform(dataset)

    # Handling rare categorical variable

    rare_categorical_values = handling_rare_categorical_values(config=data_transformation_config)
    rare_categorical_values.fit(dataset)
    dataset = rare_categorical_values.transform(dataset)    

    # Encoding the ordinal categorical features using (Target Guided Encoding)

    ordinal_features = handling_ordinal_categorical_values(config=data_transformation_config)
    ordinal_features.fit(dataset)
    dataset = ordinal_features.transform(dataset)

    # Encoding the nominal categorical features using (Mean Encoding)

    nominal_features = handling_nominal_categorical_values(config=data_transformation_config)
    nominal_features.fit(dataset)
    dataset = nominal_features.transform(dataset)    

    # Removing the Outliers in Continous feature

    Removal_of_outlier = handling_outliers_for_continous_variable(config=data_transformation_config)
    Removal_of_outlier.fit(dataset)
    dataset = Removal_of_outlier.transform(dataset)

    # Saving the transformed data

    data_transformation.save_the_transformed_data(dataset)

    # Saving the models:

    dict_of_preporcessing_models ={
                               'rare_categorical_values_handler':rare_categorical_values,
                               'date_time_variables': date_time_variables,
                               'log_transformer': log_transform,
                               'ordinal_encoder':ordinal_features,
                               'nominal_encoder':nominal_features,
                               'remove_outliers_transformer':Removal_of_outlier}
    data_transformation.save_the_model(dict_of_preporcessing_models)
    
except Exception as e:
    raise e

In [None]:
config = ConfigurationManager()
data_transformation_config = config.get_data_transformation_config()
data_transformation = SeparatingDifferentFeatures(config=data_transformation_config)
dataset = data_transformation.read_data()
dataset.shape

In [15]:
categorical_features_with_na = data_transformation.num_and_categorical_features_with_na(dataset,True)
numerical_features_with_na = data_transformation.num_and_categorical_features_with_na(dataset,False)

In [None]:
print("The categrical null count is ",len(categorical_features_with_na))
print("The numerical null count is ",len(numerical_features_with_na))

In [None]:
numerical_features = data_transformation.total_numerical_features(dataset)
print('Number of numerical variables: ', len(numerical_features))
dataset[numerical_features].head()

In [None]:
continuous_feature = data_transformation.continous_variables(dataset)
print("Continuous feature Count {}".format(len(continuous_feature)))
dataset[continuous_feature].head()

In [None]:
categorical_features = data_transformation.total_categorical_features(dataset)
print("The number of categorical variables are ", len(categorical_features))
dataset[categorical_features].head()

In [20]:
dataset = data_transformation.filling_missing_values(dataset)

In [None]:
dataset[categorical_features_with_na].isnull().sum()

In [None]:
dataset[numerical_features_with_na].isnull().sum()

In [None]:
date_time_variables = handling_date_time_variables(config=data_transformation_config)
dataset = date_time_variables.transform(dataset)
dataset[['YearBuilt','YearRemodAdd','GarageYrBlt']].head()

In [None]:
log_transform = log_transform_of_numeric_variables(config=data_transformation_config)
dataset = log_transform.transform(dataset)
dataset.head(5)

In [None]:
rare_categorical_values = handling_rare_categorical_values(config=data_transformation_config)
rare_categorical_values.fit(dataset)
dataset = rare_categorical_values.transform(dataset)
dataset[['Condition2']].head(10)

In [None]:
ordinal_features = handling_ordinal_categorical_values(config=data_transformation_config)
ordinal_features.fit(dataset)
dataset = ordinal_features.transform(dataset)
dataset[data_transformation_config.params_ordinal_categorical_feature].head()

In [None]:
nominal_features = handling_nominal_categorical_values(config=data_transformation_config)
nominal_features.fit(dataset)
dataset = nominal_features.transform(dataset)
dataset[data_transformation_config.params_nominal_categorical_feature].head()

In [None]:
Removal_of_outlier = handling_outliers_for_continous_variable(config=data_transformation_config)
Removal_of_outlier.fit(dataset)
print(Removal_of_outlier.iqr_boundaries_conitnous_feature)
dataset = Removal_of_outlier.transform(dataset)
dataset[continuous_feature].describe()

In [13]:
dataset1 = pd.read_csv(r'C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\Real_Estate_Predictor_Web_App\Real_Estate_Price_Predictor_Web_App\artifacts\data_transformation\transformed_data.csv')

In [None]:
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel
X = dataset1.drop(['Id','SalePrice'],axis=1)
Y = dataset1[['SalePrice']]
feature_sel_model = SelectFromModel(Lasso(alpha=0.005, random_state=0)) # remember to set the seed, the random state in this function
feature_sel_model.fit(X, Y)
selected_feat = X.columns[(feature_sel_model.get_support())]
print('selected features: {}'.format(len(selected_feat)))
print(selected_feat)

In [None]:
X_selected =X[selected_feat]
X_selected.head()

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X_selected,Y,test_size=0.1,random_state=0)
X_train.shape, X_test.shape

In [17]:
from sklearn.preprocessing import MinMaxScaler
X_copy = X_train.copy()
scaler=MinMaxScaler()
scaler.fit(X_copy)
X_train_data = pd.DataFrame(scaler.transform(X_copy), columns=selected_feat)
X_test_data = pd.DataFrame(scaler.transform(X_test), columns=selected_feat)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
list_of_models = ['Linear Regression', 'Ridge Regression', 'Polynomial Regression', 'SVR', 'Random Forrest Regressor', 'Decision Tree Regressor' ]
r2_score_of_models=[]
adjusted_r2_score =[]
mse=[]
for i  in list_of_models:
    if i == 'Linear Regression':
        model = LinearRegression()
    elif i == 'Ridge Regression':
        model = Ridge()
    elif i == 'Polynomial Regression':
        model = Pipeline([('poly', PolynomialFeatures(degree=2)),('linear_model', LinearRegression())])
    elif i == 'SVR':
        model = SVR(kernel='rbf', C=1.0)
    elif i == 'Random Forrest Regressor':
        model = RandomForestRegressor(n_estimators=100)
    else:
        model = DecisionTreeRegressor(max_depth=5)
# Train the model on the training data

    model.fit(X_train_data, y_train)

# Make predictions on the testing data

    y_pred = model.predict(X_test_data)

# Evaluate the model performance (e.g., R-squared, Mean Squared Error)
    
    r2 = r2_score(y_test, y_pred)
    r2_score_of_models.append(r2)

# Calculate the adjusted R²

    n = X_test_data.shape[0]  # Number of observations (samples) in the testing set
    p = X_test_data.shape[1]  # Number of features in the model
    adjusted_r2_score.append(1 - (1 - r2) * (n - 1) / (n - p - 1))
    mse.append(mean_squared_error(y_test, y_pred))
print(r2_score_of_models)
print(adjusted_r2_score)
print(mse)


In [None]:
data = {'Models': list_of_models, 'Adjusted_R2_Score': adjusted_r2_score, 'R2_Score': r2_score_of_models , 'Mean_Squared_Error': mse}
performance_metrics = pd.DataFrame.from_dict(data)
performance_metrics.set_index('Models', inplace = True)
performance_metrics

In [14]:
X_predict = pd.read_csv(r"C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\DATA SCIENCE\ADVANCE_HOUSE_PRICE_PREDICTION\test.csv")

In [None]:
X_predict.shape

In [None]:

features = ['MSSubClass', 'LotArea', 'Neighborhood', 'OverallQual', 'OverallCond',
 'YearBuilt', 'YearRemodAdd', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtUnfSF',
 'TotalBsmtSF', 'HeatingQC', '1stFlrSF', 'GrLivArea', 'BsmtFullBath',
 'KitchenQual', 'TotRmsAbvGrd', 'Fireplaces', 'FireplaceQu', 'GarageYrBlt',
 'GarageCars', 'WoodDeckSF', 'OpenPorchSF','YrSold' ]
# features = np.append(features,'YrSold')
print(features,len(features))

In [14]:
X_final = pd.read_csv(r"C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\DATA SCIENCE\ADVANCE_HOUSE_PRICE_PREDICTION\test_5.csv")
config = ConfigurationManager()
data_transformation_config = config.get_data_transformation_config()
data_transformation = SeparatingDifferentFeatures(config=data_transformation_config)

[2024-10-08 23:16:31,385: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-10-08 23:16:31,399: INFO: common: yaml file: params.yaml loaded successfully]
[2024-10-08 23:16:31,401: INFO: common: created directory at: artifacts]
[2024-10-08 23:16:31,403: INFO: common: created directory at: artifacts/data_transformation]


In [15]:
categorical_features_with_na_final = data_transformation.num_and_categorical_features_with_na(X_final,True)
numerical_features_with_na_final = data_transformation.num_and_categorical_features_with_na(X_final,False)
print("The categrical null count is ",len(categorical_features_with_na_final))
print("The numerical null count is ",len(numerical_features_with_na_final))
numerical_features_final = data_transformation.total_numerical_features(X_final)
print('Number of numerical variables: ', len(numerical_features_final))
X_final[numerical_features_final].head()
categorical_features_final = data_transformation.total_categorical_features(X_final)
print("The number of categorical variables are ", len(categorical_features_final))
continuous_feature_final = data_transformation.continous_variables(X_final)

The categrical null count is  0
The numerical null count is  0
Number of numerical variables:  19
The number of categorical variables are  5


In [16]:
print(numerical_features_final)
print(categorical_features_final)
print(continuous_feature_final)

['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'BsmtFinSF1', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'BsmtFullBath', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'WoodDeckSF', 'OpenPorchSF', 'YrSold']
['Neighborhood', 'BsmtFinType1', 'HeatingQC', 'KitchenQual', 'FireplaceQu']
['LotArea', 'BsmtFinSF1', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'WoodDeckSF', 'OpenPorchSF']


In [None]:
categorical_features_with_na_final = data_transformation.num_and_categorical_features_with_na(X_final,True)
numerical_features_with_na_final = data_transformation.num_and_categorical_features_with_na(X_final,False)
print("The categrical null count is ",len(categorical_features_with_na_final))
print("The numerical null count is ",len(numerical_features_with_na_final))
numerical_features_final = data_transformation.total_numerical_features(X_final)
print('Number of numerical variables: ', len(numerical_features_final))
X_final[numerical_features_final].head()
categorical_features_final = data_transformation.total_categorical_features(X_final)
print("The number of categorical variables are ", len(categorical_features_final))
X_final[categorical_features_final].head()
X_final = data_transformation.filling_missing_values(X_final)
X_final[categorical_features_with_na_final].isnull().sum()
X_final[numerical_features_with_na_final].isnull().sum()
X_final = date_time_variables.transform(X_final)
X_final[['YearBuilt','YearRemodAdd','GarageYrBlt']].head()
X_final = log_transform.transform(X_final)
X_final.head()
X_final = rare_categorical_values.transform(X_final)
X_final = ordinal_features.transform(X_final)
dataset[data_transformation_config.params_ordinal_categorical_feature].head()
X_final = nominal_features.transform(X_final)
dataset[data_transformation_config.params_nominal_categorical_feature].head()
X_final = Removal_of_outlier.transform(X_final)
X_final[continuous_feature_final].describe()

In [20]:
import pickle
def load_object(file_path):
    try:
        with open(file_path, "rb") as file_obj:
            return pickle.load(file_obj)

    except Exception as e:
        raise e

In [24]:
X_final = data_transformation.filling_missing_values(X_final)


In [17]:
X_1 = X_final.head(1)
X_1

Unnamed: 0,MSSubClass,LotArea,Neighborhood,OverallQual,OverallCond,YearBuilt,YearRemodAdd,BsmtFinType1,BsmtFinSF1,BsmtUnfSF,...,BsmtFullBath,KitchenQual,TotRmsAbvGrd,Fireplaces,FireplaceQu,GarageYrBlt,GarageCars,WoodDeckSF,OpenPorchSF,YrSold
0,20.0,11622.0,NAmes,5.0,6.0,1961.0,1961.0,Rec,468.0,270.0,...,0.0,TA,5.0,0.0,others,1961.0,1.0,140.0,0.0,2010.0


In [None]:
X_final.isnull().any().any()

In [24]:
date_time_transformer = load_object(r'C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\Real_Estate_Predictor_Web_App\Real_Estate_Price_Predictor_Web_App\artifacts\data_transformation\date_time_handler.pkl')
log_transformer = load_object(r'C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\Real_Estate_Predictor_Web_App\Real_Estate_Price_Predictor_Web_App\artifacts\data_transformation\log_transfomer.pkl')
rare_categorical_model = load_object(r'C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\Real_Estate_Predictor_Web_App\Real_Estate_Price_Predictor_Web_App\artifacts\data_transformation\rare_categorical_model.pkl')
ordinal_encoder = load_object(r'C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\Real_Estate_Predictor_Web_App\Real_Estate_Price_Predictor_Web_App\artifacts\data_transformation\ordinal_encoder_model.pkl')
nominal_encoder = load_object(r'C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\Real_Estate_Predictor_Web_App\Real_Estate_Price_Predictor_Web_App\artifacts\data_transformation\nominal_encoder_model.pkl')
remove_outlier_model = load_object(r'C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\Real_Estate_Predictor_Web_App\Real_Estate_Price_Predictor_Web_App\artifacts\data_transformation\remove_outlier_model.pkl')
feature_scaler = load_object(r'C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\Real_Estate_Predictor_Web_App\Real_Estate_Price_Predictor_Web_App\artifacts\train_test_data_scaled\min_max_scaler.pkl')


In [25]:
X_final = date_time_transformer.transform(X_final)
X_final = log_transformer.transform(X_final)
X_final = rare_categorical_model.transform(X_final)
X_final = ordinal_encoder.transform(X_final)
X_final = nominal_encoder.transform(X_final)
X_final = remove_outlier_model.transform(X_final)


In [26]:
X_final.drop(['YrSold'],axis=1,inplace= True)

In [None]:
print(len(X_final.columns))

In [28]:
selected_feat = ['MSSubClass', 'LotArea', 'Neighborhood', 'OverallQual', 'OverallCond',
 'YearBuilt', 'YearRemodAdd', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtUnfSF',
 'TotalBsmtSF', 'HeatingQC', '1stFlrSF', 'GrLivArea', 'BsmtFullBath',
 'KitchenQual', 'TotRmsAbvGrd', 'Fireplaces', 'FireplaceQu', 'GarageYrBlt',
 'GarageCars', 'WoodDeckSF', 'OpenPorchSF']
X_final = X_final[selected_feat]

In [29]:
X_final = pd.DataFrame(feature_scaler.transform(X_final), columns=X_final.columns)

In [30]:
model = load_object(r'C:\Users\harik\OneDrive\Desktop\HARIKRISHNAN_DETAILS\Real_Estate_Predictor_Web_App\Real_Estate_Price_Predictor_Web_App\artifacts\model_trainer\HyperParameterRandomnForrestRegression.pkl')
y_pred = model.predict(X_final)

In [None]:
# y_pred = np.exp(y_pred)
y_pred

In [None]:
print(np.exp(11.70983481))

In [18]:
class Predict:
    
    def __init__(self, config = PredictConfig):
        self.config = config
    
    def predict(self,dataframe:pd.DataFrame):
        date_time_transformer = load_object(self.config.date_time_handler_model_file)
        log_transformer = load_object(self.config.log_transformer_model_file)
        rare_categorical_model = load_object(self.config.rare_categorical_handler_file)
        ordinal_encoder = load_object(self.config.ordinal_encoder_model_file)
        nominal_encoder = load_object(self.config.nominal_encoder_model_file)
        remove_outlier_model = load_object(self.config.remove_outlier_model_file)
        feature_scaler = load_object(self.config.feature_scaling_model)
        for i in os.listdir(self.config.best_model_directory):
            if i.__contains__('pkl'):
                model_name = i
        model = load_object(os.path.join(self.config.best_model_directory,model_name))
        dataframe = date_time_transformer.transform(dataframe)
        dataframe = log_transformer.transform(dataframe)
        dataframe = rare_categorical_model.transform(dataframe)
        dataframe = ordinal_encoder.transform(dataframe)
        dataframe = nominal_encoder.transform(dataframe)
        dataframe = remove_outlier_model.transform(dataframe)
        dataframe.drop(['YrSold'],axis=1,inplace= True)
        dataframe = pd.DataFrame(feature_scaler.transform(dataframe), columns=dataframe.columns)
        y_pred = model.predict(dataframe)
        return np.exp(y_pred[0])


In [21]:
try:
    config = ConfigurationManager()
    predict_config = config.get_predict_config()
    predict = Predict(config=predict_config)
    result = predict.predict(X_1)
    print(result)
except Exception as e:
    raise e

[2024-10-08 23:17:16,452: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-10-08 23:17:16,465: INFO: common: yaml file: params.yaml loaded successfully]
[2024-10-08 23:17:16,468: INFO: common: created directory at: artifacts]
122485.91779489782


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature]=X['YrSold']-X[feature]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature]=np.log(X[feature])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature]=np.where(X[feature].isin(self.temp_df_dict[feature]),X[feature],self.config.params_rare_categorical_variable)
A value is trying to

In [None]:
np.log(123132.04830711319)