In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
pd.set_option('display.max_columns', None)  
pd.set_option('display.width', None)        
pd.set_option('display.expand_frame_repr', False)

Feature Engineering

In [3]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFECV

In [4]:
data = pd.read_csv("../data/train.csv")
# X_train.drop(columns=['Id'],inplace=True)
X_train, X_test = train_test_split(data,test_size=0.2,random_state=42)
Y_train = X_train.pop('SalePrice')
X_train.dtypes

Id                 int64
MSSubClass         int64
MSZoning          object
LotFrontage      float64
LotArea            int64
                  ...   
MiscVal            int64
MoSold             int64
YrSold             int64
SaleType          object
SaleCondition     object
Length: 80, dtype: object

In [5]:
from sklearn.base import BaseEstimator, TransformerMixin

class DropNaHeavyColumns(BaseEstimator,TransformerMixin):
    def __init__(self,threshold:int = 0.6):
       self.drop_cols = []
       self.threshold = threshold

    def fit(self, X, y=None):
        self.drop_cols = [col for col in X if X[col].isna().sum()>=X.shape[0]*self.threshold]
        # print('drop_na',len(self.drop_cols))
        return self

    def transform(self, X:pd.DataFrame):
        X_transformed = X.copy()
        X_transformed.drop(columns=self.drop_cols,inplace=True)
        return X_transformed

In [6]:
class CustomImputer(BaseEstimator,TransformerMixin):
    def __init__(self, strategy:str = 'most_frequent'):
       self.imputer = None
       self.strategy = strategy

    def fit(self, X, y=None):
        X_copy = X.copy()
        self.imputer = SimpleImputer(strategy=self.strategy)
        self.imputer.fit(X_copy)
        return self

    def transform(self, X:pd.DataFrame):
        X_transformed = X.copy()
        ret_val = pd.DataFrame(self.imputer.transform(X),columns=X_transformed.columns)

        return ret_val.astype(X_transformed.dtypes)

In [7]:
class CustomColumnTransformer(BaseEstimator,TransformerMixin):
    def __init__(self,threshold:int = 3):
       self.ordinal = None
       self.one_hot = None
       self.one_hot_cols = []
       self.ordinal_cols = []
       self.threshold = threshold

    def fit(self, X:pd.DataFrame, y=None):
        self.ordinal = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
        self.one_hot = OneHotEncoder(handle_unknown='ignore',sparse_output=False)
        X_copy = X.copy()
        self.ordinal_cols = [col for col in X.columns if X[col].dtype == 'object' and X[col].nunique()>self.threshold]
        self.one_hot_cols = [col for col in X.columns if X[col].dtype == 'object' and X[col].nunique()<=self.threshold]
        self.ordinal.fit(X_copy[self.ordinal_cols])
        self.one_hot.fit(X_copy[self.one_hot_cols])
        return self

    def transform(self, X:pd.DataFrame):
        X_copy = X.copy()
        X_copy[self.ordinal_cols] = pd.DataFrame(self.ordinal.transform(X_copy[self.ordinal_cols]),columns=self.ordinal_cols)
        hot_cols = X_copy[self.one_hot_cols] 
        one_hot =  pd.DataFrame(self.one_hot.transform(hot_cols),columns=self.one_hot.get_feature_names_out())
        X_copy.drop(columns=self.one_hot_cols,inplace=True)
        return pd.concat([X_copy,one_hot],axis=1)


In [8]:
class Scaler(BaseEstimator,TransformerMixin):
    def __init__(self, scaler = StandardScaler()):
        self.scaler = scaler
        self.cat_cols = []

    def fit(self, X:pd.DataFrame, y=None):
        if self.scaler == None:
            return self
        self.cat_cols = [c for c in X]
        self.scaler.fit(X[self.cat_cols].copy())
        return self

    def transform(self, X):
        X_copy = X.copy()
        if self.scaler == None:
            return X_copy
        X_copy[self.cat_cols] = pd.DataFrame(self.scaler.transform(X_copy[self.cat_cols]),columns=self.cat_cols)
        return X_copy

Feature Selection

In [9]:
from sklearn.feature_selection import RFE

class CustomRFE(BaseEstimator,TransformerMixin):
    def __init__(self, feature_to_select):
        self.feature_to_select = feature_to_select
        self.rfe = RFE(
            estimator=Ridge(alpha=40),
            n_features_to_select=feature_to_select
        )
        self.dropped_features = []
        self.selected_features = []

    def fit(self, X:pd.DataFrame, y=None):
        self.rfe.fit(X.copy(),y.copy())

        cols = X.columns
        selected = self.rfe.support_

        # Get selected and dropped features
        self.selected_features = cols[selected]
        self.dropped_features = cols[~selected]

        return self

    def transform(self, X):
        X_copy = X.copy()
        print(len(self.selected_features),len(self.dropped_features))
        res = X_copy[self.selected_features]
        return res    

In [10]:
class CorrelationFilter(BaseEstimator,TransformerMixin):
    def __init__(self, threshold:int = 0.7):
        self.threshold = threshold
        self.features_to_drop = []

    def fit(self, X:pd.DataFrame, y):
        X_copy = X.copy()
        corr_matrix = X_copy.corr().abs()
        high_corr_pairs = []

        for i in range(len(corr_matrix.columns)):
            for j in range(i+1, len(corr_matrix.columns)):
                if corr_matrix.iloc[i, j] > self.threshold:
                    high_corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_matrix.iloc[i, j]))


        features_to_drop = []
        for feat1, feat2, _ in high_corr_pairs:
            if abs(X[feat1].corr(y)) < abs(X[feat2].corr(y)):
                features_to_drop.append(feat1)
            else:
                features_to_drop.append(feat2)
        self.features_to_drop = features_to_drop
        print('Dropped Features',len(self.features_to_drop))
        return self

    def transform(self, X:pd.DataFrame):
        X_copy = X.copy()
        return X_copy.drop(columns=self.features_to_drop)

Training

In [11]:
pipeline = Pipeline(steps=[
    ('dropna', DropNaHeavyColumns(0.5)),
    ('imputer', CustomImputer()),
    ('cat2num', CustomColumnTransformer(threshold=30)),
    ('corr_filter', CorrelationFilter(0.7)),
    ('Scaler', Scaler(RobustScaler())),
    ('rfe', CustomRFE(70)),
    ('Model', Ridge(alpha=10))
])

In [12]:
kfold = KFold(n_splits=5, shuffle=True, random_state=40)

scalers = [
    MinMaxScaler(),
    RobustScaler(),
    None
]


param_grid = {
    # 'cat2num__threshold': [3,7,10,20,30,50],
    # 'Scaler__scaler': scalers,
    # 'dropna__threshold' : [0.4,0.5,0.6,0.7,0.8,1], 
    # 'corr_filter__threshold': [0.6,0.7,0.8,0.9,0.95,0.98]
    'Model__alpha': [0,10, 40, 70 ,100,1000,10000]
    # 'rfe__feature_to_select': [0,]
}

scoring = {
    'mae': 'neg_mean_absolute_error',
    'rmse': 'neg_root_mean_squared_error',
    'r2': 'r2'
}

grid_search = GridSearchCV(
    pipeline,
    cv=kfold,
    scoring=scoring,
    return_train_score=True,
    refit='r2',
    verbose=0,
    param_grid=param_grid
)

In [13]:
grid_search.fit(X_train,Y_train)

  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 46
70 151
70 151
70 151


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 154
70 154
70 154


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 47
70 151
70 151
70 151
Dropped Features 48
70 145
70 145
70 145


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 152
70 152
70 152


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 46
70 151
70 151
70 151


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 154
70 154
70 154


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 47
70 151
70 151
70 151
Dropped Features 48
70 145
70 145
70 145


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 152
70 152
70 152


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 46
70 151
70 151
70 151


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 154
70 154
70 154


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 47
70 151
70 151
70 151
Dropped Features 48
70 145
70 145
70 145


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 152
70 152
70 152


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 46
70 151
70 151
70 151


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 154
70 154
70 154


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 47
70 151
70 151
70 151
Dropped Features 48
70 145
70 145
70 145


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 152
70 152
70 152


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 46
70 151
70 151
70 151


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 154
70 154
70 154


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 47
70 151
70 151
70 151
Dropped Features 48
70 145
70 145
70 145


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 152
70 152
70 152


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 46
70 151
70 151
70 151


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 154
70 154
70 154


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 47
70 151
70 151
70 151
Dropped Features 48
70 145
70 145
70 145


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 152
70 152
70 152


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 46
70 151
70 151
70 151


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 154
70 154
70 154


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 47
70 151
70 151
70 151
Dropped Features 48
70 145
70 145
70 145


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 152
70 152
70 152
Dropped Features 44
70 157


In [14]:
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.4f}")

Best parameters: {'Model__alpha': 10}
Best cross-validation score: 0.8267


In [15]:
results = pd.DataFrame(grid_search.cv_results_)

In [16]:
results = results.sort_values(by='mean_test_rmse', ascending=False)

In [17]:
cv_results =  results[['params','mean_test_mae','mean_train_mae','mean_test_r2','mean_train_r2','mean_test_rmse','mean_train_rmse']]

In [18]:
# for c in cv_results.index:
#     print(cv_results.iloc[c].iloc[0],cv_results.iloc[c].iloc[1],cv_results.iloc[c].iloc[2],cv_results.iloc[c].iloc[3],cv_results.iloc[c].iloc[4],cv_results.iloc[c].iloc[5],cv_results.iloc[c].iloc[6])

cv_results

Unnamed: 0,params,mean_test_mae,mean_train_mae,mean_test_r2,mean_train_r2,mean_test_rmse,mean_train_rmse
1,{'Model__alpha': 10},-18269.78452,-16232.869943,0.826694,0.885647,-31765.717239,-26092.463202
2,{'Model__alpha': 40},-18418.702261,-16866.358287,0.825348,0.866951,-31952.512637,-28142.664363
3,{'Model__alpha': 70},-18810.506566,-17471.280034,0.821044,0.855494,-32400.224537,-29329.642447
4,{'Model__alpha': 100},-19192.878783,-17963.06676,0.816859,0.846815,-32822.263502,-30198.422664
0,{'Model__alpha': 0},-18263.633587,-15363.11644,0.809491,0.914085,-33044.699189,-22629.629025
5,{'Model__alpha': 1000},-25960.60021,-25286.084576,0.71741,0.723603,-40969.296003,-40591.302473
6,{'Model__alpha': 10000},-46885.192333,-46734.623413,0.273861,0.276748,-65505.304271,-65662.428104


In [66]:
from sklearn.model_selection import cross_validate
n_array = [40, 55 ,70, 85 ,100]

t = []


pipeline = Pipeline(steps=[
    ('dropna', DropNaHeavyColumns(0.5)),
    ('imputer', CustomImputer()),
    ('cat2num', CustomColumnTransformer(threshold=30)),
    ('corr_filter', CorrelationFilter(0.7)),
    ('Scaler', Scaler(RobustScaler())),
    ('rfe', CustomRFE(70)),
    ('Model', Ridge(alpha=10))
])

scoring = {
    'mae': 'neg_mean_absolute_error',
    'rmse': 'neg_root_mean_squared_error',
    'r2': 'r2'
}

# p = Pipeline(pipeline.steps)

results = cross_validate(pipeline, X_train, Y_train, cv=KFold(shuffle=True,random_state=40), scoring=scoring, return_train_score=True)

train_rmse = -1*np.mean(results['train_rmse'])
test_rmse = -1*np.mean(results['test_rmse'])

train_r2 = np.mean(results['train_r2'])
test_r2 = np.mean(results['test_r2'])

train_mae = -1*np.mean(results['train_mae'])
test_mae = -1*np.mean(results['test_mae'])

print(train_mae,test_mae,train_r2,test_r2,train_rmse,test_rmse)


    

  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 46
70 151
70 151
70 151


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 154
70 154
70 154


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 47
70 151
70 151
70 151
Dropped Features 48
70 145
70 145
70 145


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 152
70 152
70 152
16232.869943283655 18269.784519563902 0.8856468503182278 0.8266944771287239 26092.463202263752 31765.717239465535


In [19]:
grid_search.best_estimator_

In [68]:
from sklearn.model_selection import cross_validate

scoring = {
    'mae': 'neg_mean_absolute_error',
    'rmse': 'neg_root_mean_squared_error',
    'r2': 'r2'
}

# p = Pipeline(pipeline.steps)

results = cross_validate(pipeline, X_train, Y_train, cv=KFold(shuffle=True,random_state=40), scoring=scoring, return_train_score=True)

train_rmse = -1*np.mean(results['train_rmse'])
test_rmse = -1*np.mean(results['test_rmse'])

print(f"train rmse: {train_rmse:.4f}")
print(f"test rmse: {test_rmse:.4f}")


train_r2 = np.mean(results['train_r2'])
test_r2 = np.mean(results['test_r2'])

print(f"train r2: {(train_r2):.4f}")
print(f"test r2: {(test_r2):.4f}")

train_mae = -1*np.mean(results['train_mae'])
test_mae = -1*np.mean(results['test_mae'])

print(f"train mae: {train_mae:.4f}")
print(f"test mae: {test_mae:.4f}")


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 46
70 151
70 151
70 151


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 154
70 154
70 154


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 47
70 151
70 151
70 151
Dropped Features 48
70 145
70 145
70 145


  c /= stddev[:, None]
  c /= stddev[None, :]


Dropped Features 45
70 152
70 152
70 152
train rmse: 26092.4632
test rmse: 31765.7172
train r2: 0.8856
test r2: 0.8267
train mae: 16232.8699
test mae: 18269.7845


In [21]:
from sklearn.feature_selection import RFE

X_transformed = Pipeline(steps=pipeline.steps[:-1]).fit_transform(X_train,Y_train)

# rfecv = RFECV(
#     estimator=pipeline,
#     step=1,                      
#     cv=kfold,
#     scoring='neg_mean_absolute_error'
# )

# rfecv.fit(X_train,Y_train)



Dropped Features 44
150 77


In [22]:
X_transformed

Unnamed: 0,Id,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,TotalBsmtSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,KitchenAbvGr,Fireplaces,GarageYrBlt,GarageCars,WoodDeckSF,MSZoning_C (all),MSZoning_RM,Street_Grvl,LotShape_IR1,LotShape_IR2,LotShape_IR3,LandContour_Bnk,LandContour_HLS,LandContour_Lvl,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LandSlope_Mod,Neighborhood_Blmngtn,Neighborhood_BrkSide,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition2_Norm,Condition2_PosA,Condition2_PosN,BldgType_1Fam,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,HouseStyle_1.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofMatl_ClyTile,RoofMatl_CompShg,RoofMatl_WdShngl,Exterior1st_BrkFace,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_WdShing,Exterior2nd_BrkFace,Exterior2nd_ImStucc,Exterior2nd_Plywood,Exterior2nd_Stucco,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,ExterQual_Ex,ExterQual_TA,ExterCond_Ex,Foundation_BrkTil,Foundation_PConc,Foundation_Slab,BsmtQual_Ex,BsmtQual_Fa,BsmtQual_TA,BsmtCond_Fa,BsmtCond_Gd,BsmtExposure_Av,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,BsmtFinType1_Unf,BsmtFinType2_ALQ,BsmtFinType2_BLQ,Heating_GasA,Heating_OthW,HeatingQC_Ex,HeatingQC_Gd,CentralAir_N,Electrical_FuseA,KitchenQual_Ex,KitchenQual_Fa,KitchenQual_TA,Functional_Maj1,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Sev,Functional_Typ,FireplaceQu_Ex,FireplaceQu_Fa,FireplaceQu_Po,GarageType_2Types,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageFinish_Fin,GarageFinish_RFn,GarageQual_Ex,GarageQual_Gd,GarageQual_TA,GarageCond_Po,GarageCond_TA,PavedDrive_P,SaleType_COD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal
0,-0.644399,0.368421,-0.291776,-0.5,1.0,-0.312500,-0.973684,0.000000,0.745492,0.628912,-0.246035,1.0,0.0,-1.0,0.0,0.0,-1.0,-0.682927,-1.0,1.488095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.451417,-0.210526,-0.428667,0.0,2.0,0.437500,0.000000,0.000000,-0.533287,-0.394436,0.151644,0.0,0.0,0.0,1.0,0.0,0.0,0.195122,0.0,0.000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.126181,0.210526,-0.200109,-0.5,2.0,-1.291667,-1.157895,0.000000,-0.533287,-0.400397,-1.047582,0.0,0.0,-1.0,0.0,0.0,-1.0,0.487805,-2.0,1.952381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.091093,-0.157895,-0.583551,-0.5,2.0,-0.729167,-1.157895,1.518072,0.255895,-0.529558,0.456480,1.0,0.0,-1.0,1.0,0.0,1.0,-1.121951,-1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.474359,-0.684211,-1.118473,-0.5,1.0,-1.000000,-1.157895,0.000000,-0.230929,0.056632,0.337331,0.0,0.0,0.0,0.0,0.0,0.0,-1.487805,-1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1163,0.490553,0.789474,-0.068810,0.0,0.0,0.708333,0.315789,0.000000,-0.500000,0.628912,-0.246035,0.0,0.0,0.0,0.0,0.0,0.0,0.512195,0.0,0.000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1164,0.537787,0.105263,-0.436691,-1.0,-2.0,-0.916667,-1.157895,0.000000,0.329404,0.247392,0.786074,1.0,0.0,0.0,0.0,0.0,1.0,-0.097561,0.0,2.565476,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1165,0.759109,-0.157895,-0.347213,-0.5,2.0,-0.354167,-0.105263,0.000000,-0.301664,-0.265276,-0.942360,1.0,0.0,-1.0,0.0,0.0,-1.0,-0.682927,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1166,0.173414,-0.421053,-0.476080,0.5,3.0,-1.125000,0.105263,0.000000,-0.533287,-0.169896,-0.072727,0.0,0.0,-1.0,1.0,0.0,0.0,-1.463415,-1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:

rfe = RFE(
    estimator=Ridge(alpha=40),
    n_features_to_select=150
)

mod = rfe.fit(X_transformed.copy(),Y_train)

cols = X_transformed.columns

selected = rfe.support_

# Get selected and dropped features
selected_features = cols[selected]
dropped_features = cols[~selected]

print("✅ Selected features:", list(selected_features))
print("❌ Dropped features:", list(dropped_features))


res = pd.DataFrame(mod.transform(X_transformed),columns = selected_features)

✅ Selected features: ['Id', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'TotalBsmtSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'KitchenAbvGr', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'WoodDeckSF', 'MSZoning_C (all)', 'MSZoning_RM', 'Street_Grvl', 'LotShape_IR1', 'LotShape_IR2', 'LotShape_IR3', 'LandContour_Bnk', 'LandContour_HLS', 'LandContour_Lvl', 'LotConfig_CulDSac', 'LotConfig_FR2', 'LotConfig_FR3', 'LandSlope_Mod', 'Neighborhood_Blmngtn', 'Neighborhood_BrkSide', 'Neighborhood_CollgCr', 'Neighborhood_Crawfor', 'Neighborhood_Edwards', 'Neighborhood_Gilbert', 'Neighborhood_IDOTRR', 'Neighborhood_MeadowV', 'Neighborhood_Mitchel', 'Neighborhood_NAmes', 'Neighborhood_NWAmes', 'Neighborhood_NoRidge', 'Neighborhood_NridgHt', 'Neighborhood_OldTown', 'Neighborhood_SWISU', 'Neighborhood_Sawyer', 'Neighborhood_SawyerW', 'Neighborhood_Somerst', 'Neighborhood_StoneBr', 'Neighborhood_Timber', 'Ne

In [24]:
res

Unnamed: 0,Id,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,TotalBsmtSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,KitchenAbvGr,Fireplaces,GarageYrBlt,GarageCars,WoodDeckSF,MSZoning_C (all),MSZoning_RM,Street_Grvl,LotShape_IR1,LotShape_IR2,LotShape_IR3,LandContour_Bnk,LandContour_HLS,LandContour_Lvl,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LandSlope_Mod,Neighborhood_Blmngtn,Neighborhood_BrkSide,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition2_Norm,Condition2_PosA,Condition2_PosN,BldgType_1Fam,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,HouseStyle_1.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofMatl_ClyTile,RoofMatl_CompShg,RoofMatl_WdShngl,Exterior1st_BrkFace,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_WdShing,Exterior2nd_BrkFace,Exterior2nd_ImStucc,Exterior2nd_Plywood,Exterior2nd_Stucco,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,ExterQual_Ex,ExterQual_TA,ExterCond_Ex,Foundation_BrkTil,Foundation_PConc,Foundation_Slab,BsmtQual_Ex,BsmtQual_Fa,BsmtQual_TA,BsmtCond_Fa,BsmtCond_Gd,BsmtExposure_Av,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,BsmtFinType1_Unf,BsmtFinType2_ALQ,BsmtFinType2_BLQ,Heating_GasA,Heating_OthW,HeatingQC_Ex,HeatingQC_Gd,CentralAir_N,Electrical_FuseA,KitchenQual_Ex,KitchenQual_Fa,KitchenQual_TA,Functional_Maj1,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Sev,Functional_Typ,FireplaceQu_Ex,FireplaceQu_Fa,FireplaceQu_Po,GarageType_2Types,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageFinish_Fin,GarageFinish_RFn,GarageQual_Ex,GarageQual_Gd,GarageQual_TA,GarageCond_Po,GarageCond_TA,PavedDrive_P,SaleType_COD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal
0,-0.644399,0.368421,-0.291776,-0.5,1.0,-0.312500,-0.973684,0.000000,0.745492,0.628912,-0.246035,1.0,0.0,-1.0,0.0,0.0,-1.0,-0.682927,-1.0,1.488095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.451417,-0.210526,-0.428667,0.0,2.0,0.437500,0.000000,0.000000,-0.533287,-0.394436,0.151644,0.0,0.0,0.0,1.0,0.0,0.0,0.195122,0.0,0.000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.126181,0.210526,-0.200109,-0.5,2.0,-1.291667,-1.157895,0.000000,-0.533287,-0.400397,-1.047582,0.0,0.0,-1.0,0.0,0.0,-1.0,0.487805,-2.0,1.952381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.091093,-0.157895,-0.583551,-0.5,2.0,-0.729167,-1.157895,1.518072,0.255895,-0.529558,0.456480,1.0,0.0,-1.0,1.0,0.0,1.0,-1.121951,-1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.474359,-0.684211,-1.118473,-0.5,1.0,-1.000000,-1.157895,0.000000,-0.230929,0.056632,0.337331,0.0,0.0,0.0,0.0,0.0,0.0,-1.487805,-1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1163,0.490553,0.789474,-0.068810,0.0,0.0,0.708333,0.315789,0.000000,-0.500000,0.628912,-0.246035,0.0,0.0,0.0,0.0,0.0,0.0,0.512195,0.0,0.000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1164,0.537787,0.105263,-0.436691,-1.0,-2.0,-0.916667,-1.157895,0.000000,0.329404,0.247392,0.786074,1.0,0.0,0.0,0.0,0.0,1.0,-0.097561,0.0,2.565476,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1165,0.759109,-0.157895,-0.347213,-0.5,2.0,-0.354167,-0.105263,0.000000,-0.301664,-0.265276,-0.942360,1.0,0.0,-1.0,0.0,0.0,-1.0,-0.682927,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1166,0.173414,-0.421053,-0.476080,0.5,3.0,-1.125000,0.105263,0.000000,-0.533287,-0.169896,-0.072727,0.0,0.0,-1.0,1.0,0.0,0.0,-1.463415,-1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
selected = rfe.support_

# Get selected and dropped features
selected_features = cols[selected]
dropped_features = cols[~selected]

print("✅ Selected features:", list(selected_features))
print("❌ Dropped features:", list(dropped_features))


✅ Selected features: ['Id', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'TotalBsmtSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'KitchenAbvGr', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'WoodDeckSF', 'MSZoning_C (all)', 'MSZoning_RM', 'Street_Grvl', 'LotShape_IR1', 'LotShape_IR2', 'LotShape_IR3', 'LandContour_Bnk', 'LandContour_HLS', 'LandContour_Lvl', 'LotConfig_CulDSac', 'LotConfig_FR2', 'LotConfig_FR3', 'LandSlope_Mod', 'Neighborhood_Blmngtn', 'Neighborhood_BrkSide', 'Neighborhood_CollgCr', 'Neighborhood_Crawfor', 'Neighborhood_Edwards', 'Neighborhood_Gilbert', 'Neighborhood_IDOTRR', 'Neighborhood_MeadowV', 'Neighborhood_Mitchel', 'Neighborhood_NAmes', 'Neighborhood_NWAmes', 'Neighborhood_NoRidge', 'Neighborhood_NridgHt', 'Neighborhood_OldTown', 'Neighborhood_SWISU', 'Neighborhood_Sawyer', 'Neighborhood_SawyerW', 'Neighborhood_Somerst', 'Neighborhood_StoneBr', 'Neighborhood_Timber', 'Ne

Upload to Dagshub

In [69]:
import dagshub
dagshub.init(repo_owner='azhgh22', repo_name='ML01_House-Prices', mlflow=True)

In [27]:
pipeline.get_params()['steps']
grid_search.best_params_

{'rfe__feature_to_select': 0}

In [28]:
param_grid

{'rfe__feature_to_select': [0]}

In [70]:
import mlflow
mlflow.set_experiment("Experiment 2: Regularized Linear Regression")
with mlflow.start_run(run_name="run1"):
    mlflow.log_params({
        'droped features threshold': 'droped (NA count) >= (row count)*0.5',
        'Imputer' : 'fill with mode',
        'Cat2num' : 'OrdinalEncoder for features having 30+ categorical values, else OneHotEncoding',
        'Correlation filter thresholds' : 0.7,
        'Scaler' : 'RobustScaler',
        'regularization alpha' : 10,
        'rfe selected columns' : 70, 
        'kfold splits': 5,
        'ModelType' : 'LinearRegression',
        'Score' : 'neg_root_mean_squared_error',
    })
    mlflow.log_metrics({
        'train_rmse': train_rmse,
        'test_rmse' : test_rmse,
        'train_r2' : train_r2,
        'test_r2' : test_r2,
        'train_mae' : train_mae,
        'test_mae' : test_mae
    })
    mlflow.sklearn.log_model(pipeline.fit(X_train,Y_train),'LinregressionPipeline')
    
    mlflow.end_run()

2025/04/06 12:54:35 INFO mlflow.tracking.fluent: Experiment with name 'Experiment 2: Regularized Linear Regression' does not exist. Creating a new experiment.


Dropped Features 44
70 157




🏃 View run run1 at: https://dagshub.com/azhgh22/ML01_House-Prices.mlflow/#/experiments/2/runs/a08745ec649e4acfaebaf33a2f1bc686
🧪 View experiment at: https://dagshub.com/azhgh22/ML01_House-Prices.mlflow/#/experiments/2
