**Import necessary libraries and dataset**

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import numpy as np

# Load the train and test data
train_file_path = r'/content/sample_data/train.csv'
test_file_path = r'/content/sample_data/test.csv'

trainData = pd.read_csv(train_file_path)
testData = pd.read_csv(test_file_path)

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
pd.set_option('display.max_columns',None)#displaying long list of columns
pd.set_option('display.max_rows', None)#displaying long list of rows
pd.set_option('display.width', 1000)#width of window


In [None]:
# Display column names
print("Column names:")
print(trainData.columns.tolist())

Column names:
['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolA

In [None]:
#Extract Target Column before doing missing value substitutions and one-hot encoding======
Target_Train_Cols = trainData["SalePrice"]#make copy of target column
trainData = trainData.drop(["SalePrice"], axis=1) #extracting training data without the target column

print(trainData.shape)

(1460, 80)


**Verify and drop columns with missing values**

In [None]:
#DROP COLUMNS WITH LOTS OF MISSING VALUES===============================
#CAN ALSO DROP ROWS WITH LOTS OF MISSING VALUES
#Combine Train data and test data first so that the SAME COLUMNS are DROPPED in each
combined_Data = pd.concat([trainData, testData], keys=[0,1])

combined_Data.isnull().sum().sort_values(ascending=False)

Unnamed: 0,0
PoolQC,2909
MiscFeature,2814
Alley,2721
Fence,2348
MasVnrType,1766
FireplaceQu,1420
LotFrontage,486
GarageQual,159
GarageCond,159
GarageYrBlt,159


In [None]:
#Define threshold for dropping columns
percent=int(0.6*(combined_Data.shape[0]))
print(percent)
#Drop columns that have less than "thresh" number of non_Nans
td1=combined_Data.dropna(thresh=percent,axis=1)
print(td1.shape)

1751
(2919, 74)


In [None]:
#look at what other columns have missing values
td1.isnull().sum().sort_values(ascending=False)

Unnamed: 0,0
LotFrontage,486
GarageYrBlt,159
GarageFinish,159
GarageQual,159
GarageCond,159
GarageType,157
BsmtExposure,82
BsmtCond,82
BsmtQual,81
BsmtFinType2,80


In [None]:
#NOW IMPUTE MISSING VALUES FOR THE OTHER COLUMNS=========================
#IMPUTE (SUBSTITUTE) MEAN VALUES FOR NaN IN NUMERIC COLUMNS
numeric=td1.select_dtypes(include=['int','float64']).columns
for num in numeric:
  td1[num]=td1[num].fillna(td1[num].mean())

#IMPUTE (SUBSTITUTE) MODE VALUES FOR NaN IN CATEGORICAL COLUMNS
train_cat_cols = td1.select_dtypes(exclude=['int','float64']).columns#selecting the categorical columns
for colss in train_cat_cols:
  if(td1.iloc[0][colss]=="N"):
        td1[colss]=td1[colss].fillna("N")
  else:
    td1[colss]=td1[colss].fillna(td1[colss].mode())

print(td1.head(20))

      Id  MSSubClass MSZoning  LotFrontage  LotArea Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle  OverallQual  OverallCond  YearBuilt  YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd  MasVnrArea ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1  BsmtFinSF1 BsmtFinType2  BsmtFinSF2  BsmtUnfSF  TotalBsmtSF Heating HeatingQC CentralAir Electrical  1stFlrSF  2ndFlrSF  LowQualFinSF  GrLivArea  BsmtFullBath  BsmtHalfBath  FullBath  HalfBath  BedroomAbvGr  KitchenAbvGr KitchenQual  TotRmsAbvGrd Functional  Fireplaces GarageType  GarageYrBlt GarageFinish  GarageCars  GarageArea GarageQual GarageCond PavedDrive  WoodDeckSF  OpenPorchSF  EnclosedPorch  3SsnPorch  ScreenPorch  PoolArea  MiscVal  MoSold  YrSold SaleType SaleCondition
0 0    1          60       RL    65.000000     8450   Pave      Reg         Lvl    AllPub    Inside       Gtl      CollgCr       Norm       Norm     1Fam     2Story   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  td1[num]=td1[num].fillna(td1[num].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  td1[colss]=td1[colss].fillna(td1[colss].mode())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  td1[colss]=td1[colss].fillna(td1[colss].mode())
A value is trying to be set on a copy of a slice from a DataFrame.


In [None]:
#CHECK IF THERE ARE ANY REMAINING MISSING VALUES
td1.isnull().sum().sort_values(ascending=False)

Unnamed: 0,0
GarageCond,159
GarageFinish,159
GarageQual,159
GarageType,157
BsmtCond,82
BsmtExposure,82
BsmtQual,81
BsmtFinType2,80
BsmtFinType1,79
MSZoning,4


In [None]:
#DROP COLUMNS THAT STILL HAVE NULL VALUES
print(td1.shape)
td1=td1.drop(columns=["GarageCond", "GarageFinish", "GarageQual", "GarageType", "BsmtCond", "BsmtExposure", "BsmtQual", "BsmtFinType2", "BsmtFinType1", "MSZoning", "Utilities", "Functional", "Electrical", "Exterior2nd", "Exterior1st", "KitchenQual", "SaleType"])
#td1=td1.drop(columns=["PropertyField5"])
td1.isnull().sum()
print(td1.shape)

(2919, 74)
(2919, 57)


**Perform one hot encoding**

In [None]:
#DO ONE-HOT ENCODING ON CATEGORICAL VARIABLES==============================================
#The below function returns a list of categorical features which are not numeric.
train_cat_cols = td1.select_dtypes(exclude=['float','int']).columns #selecting the categorical columns
print(train_cat_cols.shape)
print(train_cat_cols)

#If there are categorical columns which are encoded as numeric ones
#then we need to explicitly enter the column names in a list and concatenate the two lists in python.
#ONE-HOT ENCODING-generate one-hot encoding on a common basis -THIS TAKES 30 MINS

combined_Data = pd.get_dummies(td1,train_cat_cols)
combined_Data.head(10)

(20,)
Index(['Street', 'LotShape', 'LandContour', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'ExterQual', 'ExterCond', 'Foundation', 'Heating', 'HeatingQC', 'CentralAir', 'PavedDrive', 'SaleCondition'], dtype='object')


Unnamed: 0,Unnamed: 1,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,Street_Grvl,Street_Pave,LotShape_IR1,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LandSlope_Gtl,LandSlope_Mod,LandSlope_Sev,Neighborhood_Blmngtn,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,Condition1_RRNn,Condition2_Artery,Condition2_Feedr,Condition2_Norm,Condition2_PosA,Condition2_PosN,Condition2_RRAe,Condition2_RRAn,Condition2_RRNn,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,RoofMatl_ClyTile,RoofMatl_CompShg,RoofMatl_Membran,RoofMatl_Metal,RoofMatl_Roll,RoofMatl_Tar&Grv,RoofMatl_WdShake,RoofMatl_WdShngl,ExterQual_Ex,ExterQual_Fa,ExterQual_Gd,ExterQual_TA,ExterCond_Ex,ExterCond_Fa,ExterCond_Gd,ExterCond_Po,ExterCond_TA,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,Heating_Floor,Heating_GasA,Heating_GasW,Heating_Grav,Heating_OthW,Heating_Wall,HeatingQC_Ex,HeatingQC_Fa,HeatingQC_Gd,HeatingQC_Po,HeatingQC_TA,CentralAir_N,CentralAir_Y,PavedDrive_N,PavedDrive_P,PavedDrive_Y,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,0,1,60,65.0,8450,7,5,2003,2003,196.0,706.0,0.0,150.0,856.0,856,854,0,1710,1.0,0.0,2,1,3,1,8,0,2003.0,2.0,548.0,0,61,0,0,0,0,0,2,2008,False,True,False,False,False,True,False,False,False,True,False,False,False,False,True,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False
0,1,2,20,80.0,9600,6,8,1976,1976,0.0,978.0,0.0,284.0,1262.0,1262,0,0,1262,0.0,1.0,2,0,3,1,6,1,1976.0,2.0,460.0,298,0,0,0,0,0,0,5,2007,False,True,False,False,False,True,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False
0,2,3,60,68.0,11250,7,5,2001,2002,162.0,486.0,0.0,434.0,920.0,920,866,0,1786,1.0,0.0,2,1,3,1,6,1,2001.0,2.0,608.0,0,42,0,0,0,0,0,9,2008,False,True,True,False,False,False,False,False,False,True,False,False,False,False,True,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False
0,3,4,70,60.0,9550,7,5,1915,1970,0.0,216.0,0.0,540.0,756.0,961,756,0,1717,1.0,0.0,1,0,3,1,7,1,1998.0,3.0,642.0,0,35,272,0,0,0,0,2,2006,False,True,True,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,True,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False
0,4,5,60,84.0,14260,8,5,2000,2000,350.0,655.0,0.0,490.0,1145.0,1145,1053,0,2198,1.0,0.0,2,1,4,1,9,1,2000.0,3.0,836.0,192,84,0,0,0,0,0,12,2008,False,True,True,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False
0,5,6,50,85.0,14115,5,5,1993,1995,0.0,732.0,0.0,64.0,796.0,796,566,0,1362,1.0,0.0,1,1,1,1,5,0,1993.0,2.0,480.0,40,30,0,320,0,0,700,10,2009,False,True,True,False,False,False,False,False,False,True,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False
0,6,7,20,75.0,10084,8,5,2004,2005,186.0,1369.0,0.0,317.0,1686.0,1694,0,0,1694,1.0,0.0,2,0,3,1,7,1,2004.0,2.0,636.0,255,57,0,0,0,0,0,8,2007,False,True,False,False,False,True,False,False,False,True,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False
0,7,8,60,69.305795,10382,7,6,1973,1973,240.0,859.0,32.0,216.0,1107.0,1107,983,0,2090,1.0,0.0,2,1,3,1,7,2,1973.0,2.0,484.0,235,204,228,0,0,0,350,11,2009,False,True,True,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False
0,8,9,50,51.0,6120,7,5,1931,1950,0.0,0.0,0.0,952.0,952.0,1022,752,0,1774,0.0,0.0,2,0,2,2,8,2,1931.0,2.0,468.0,90,0,205,0,0,0,0,4,2008,False,True,False,False,False,True,False,False,False,True,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,True,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False
0,9,10,190,50.0,7420,5,6,1939,1950,0.0,851.0,0.0,140.0,991.0,1077,0,0,1077,1.0,0.0,1,0,2,2,5,2,1939.0,1.0,205.0,0,4,0,0,0,0,0,1,2008,False,True,False,False,False,True,False,False,False,True,True,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,True,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False


In [None]:
#Separate Train data and test data
trainData = td1.xs(0)
testData = td1.xs(1)
print(trainData.shape)
print(testData.shape)

trainData=pd.concat([trainData,Target_Train_Cols], axis=1)
print(trainData.shape)


(1460, 57)
(1459, 57)
(1460, 58)


In [None]:
export_csv = trainData.to_csv(r'/content/sample_data/Preprocess_Train.csv')
exporttest_csv = testData.to_csv(r'/content/sample_data/Preprocess_Test.csv')



In [None]:
!pip install vecstack

from vecstack import stacking
import pandas as pd
import numpy as np


from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score

import warnings
warnings.filterwarnings("ignore")

Collecting vecstack
  Downloading vecstack-0.4.0.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: vecstack
  Building wheel for vecstack (setup.py) ... [?25l[?25hdone
  Created wheel for vecstack: filename=vecstack-0.4.0-py3-none-any.whl size=19861 sha256=6110a40d6ee2f697e3795631e4e29cf9137cca509e51e3f05ad8a5a2c47ed231
  Stored in directory: /root/.cache/pip/wheels/b8/d8/51/3cf39adf22c522b0a91dc2208db4e9de4d2d9d171683596220
Successfully built vecstack
Installing collected packages: vecstack
Successfully installed vecstack-0.4.0


In [None]:
# Load the train and test data
train_file_path = r'/content/sample_data/train.csv'
test_file_path = r'/content/sample_data/test.csv'


trainfile = r'/content/sample_data/Preprocess_Train.csv'
trainData = pd.read_csv(trainfile) #creates a dataframe
testfile = r'/content/sample_data/Preprocess_Test.csv'
testData = pd.read_csv(testfile)  #creates a dataframe


print(trainData.shape)
print(testData.shape)

(1460, 59)
(1459, 58)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load the CSV files
train_data = pd.read_csv(trainfile) #creates a dataframe
test_data = pd.read_csv(testfile)  #creates a dataframe

# Apply One-Hot Encoding to categorical columns (if any)
train_data_encoded = pd.get_dummies(train_data)
test_data_encoded = pd.get_dummies(test_data)

# Ensure both train and test data have the same columns after encoding
# Align columns by adding missing ones to the test set and filling with 0
test_data_encoded = test_data_encoded.reindex(columns=train_data_encoded.columns, fill_value=0)



**Use Random Forest Regressor to generate Model 01 and perform hyperparameter tuning with cross-validation**

In [None]:
# Separate features (X) and target (y) in the training data
# Assuming the target column is named 'target'
X_train = train_data_encoded.drop(columns=["Id", "SalePrice"])
y_train = train_data_encoded["SalePrice"]

# Train the Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Prepare test features
#X_test = test_data_encoded.drop(columns=["Id"])
#X_test = test.drop(columns=['ID'])

# Predict on the test data
X_test = test_data_encoded.drop(columns=["Id","SalePrice"])
y_pred = model.predict(X_test)

# Optionally, print the results or save predictions to a file
print("Predictions for the test data:", y_pred)

# Create submission DataFrame with 'ID' and 'TARGET' columns
submission = pd.DataFrame({
    'Id': test_data_encoded['Id'],
    'SalePrice': y_pred
})

# Save submission file
submission.to_csv("MODEL01.csv", index=False)
print("Submission file saved as MODEL01.csv")

# If you want to save the predictions to a CSV file
#predictions_df = pd.DataFrame({'Prediction': y_pred})
#predictions_df.to_csv('predictions.csv', index=False)

Predictions for the test data: [130020.5  155240.5  183558.53 ... 157098.46 113264.5  236082.59]
Submission file saved as MODEL01.csv


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

# Define the parameter distribution for random search
param_dist_rf = {
    'n_estimators': randint(50, 200),  # Number of trees in the forest
    'max_depth': [5, 10, 20, 30, None],  # Max depth of the trees
    'min_samples_split': randint(2, 20),  # Minimum samples required to split an internal node
    'min_samples_leaf': randint(1, 20),  # Minimum samples required to be at a leaf node
    'max_features': ['auto', 'sqrt', 'log2', None]  # Number of features to consider when looking for the best split
}

# Initialize Random Forest Classifier
rf = RandomForestClassifier(random_state=42)

# Random Search with cross-validation
random_search_rf = RandomizedSearchCV(estimator=rf, param_distributions=param_dist_rf, n_iter=20, cv=5, scoring='accuracy', random_state=42)

# Fit the random search model
random_search_rf.fit(X_train, y_train)

# Best parameters from random search
print("Best parameters from Random Search (Random Forest):", random_search_rf.best_params_)

Best parameters from Random Search (Random Forest): {'max_depth': 30, 'max_features': 'sqrt', 'min_samples_leaf': 6, 'min_samples_split': 11, 'n_estimators': 53}


**Use Decision Tree Regressor to generate Model 01 and perform hyperparameter tuning with cross-validation**

In [None]:
from sklearn.tree import DecisionTreeRegressor

# Separate features (X) and target (y) in the training data
# Assuming the target column is named 'SalePrice'
X_train = train_data_encoded.drop(columns=["Id", "SalePrice"])
y_train = train_data_encoded["SalePrice"]

# Train the Decision Tree Regressor
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

# Prepare test features
X_test = test_data_encoded.drop(columns=["Id", "SalePrice"])

# Predict on the test data
y_pred = model.predict(X_test)

# Optionally, print the results or save predictions to a file
print("Predictions for the test data:", y_pred)

# Create submission DataFrame with 'ID' and 'TARGET' columns
submission = pd.DataFrame({
    'Id': test_data_encoded['Id'],
    'SalePrice': y_pred
})

# Save submission file
submission.to_csv("MODEL02.csv", index=False)
print("Submission file saved as MODEL02.csv")


Predictions for the test data: [129000. 163000. 215000. ... 150750. 108000. 219500.]
Submission file saved as MODEL02.csv


In [None]:
from scipy.stats import randint
from scipy.stats import randint
from sklearn.tree import DecisionTreeRegressor # Import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV


# Instantiate the DecisionTreeRegressor
dt = DecisionTreeRegressor()

# Define the parameter distribution for random search
param_dist_dt = {
  'max_depth': [5, 10, 20, 30, None],
  'min_samples_split': randint(2, 20),
  'min_samples_leaf': randint(1, 20)
}
# Random Search with cross-validation
random_search_dt = RandomizedSearchCV(estimator=dt, param_distributions=param_dist_dt, n_iter=20, cv=5, scoring='accuracy', random_state=42)
random_search_dt.fit(X_train, y_train)
# Best parameters from random search
print("Best parameters from Random Search (Decision Tree):", random_search_dt.best_params_)

Best parameters from Random Search (Decision Tree): {'max_depth': 30, 'min_samples_leaf': 15, 'min_samples_split': 12}


**Use MLP Regressor to generate Model 01 and perform hyperparameter tuning with cross-validation**

In [None]:
from sklearn.neural_network import MLPRegressor

# Separate features (X) and target (y) in the training data
# Assuming the target column is named 'SalePrice'
X_train = train_data_encoded.drop(columns=["Id", "SalePrice"])
y_train = train_data_encoded["SalePrice"]

# Train the MLP Regressor
model = MLPRegressor(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42, max_iter=500)
model.fit(X_train, y_train)

# Prepare test features
X_test = test_data_encoded.drop(columns=["Id", "SalePrice"])

# Predict on the test data
y_pred = model.predict(X_test)

# Optionally, print the results or save predictions to a file
print("Predictions for the test data:", y_pred)

# Create submission DataFrame with 'ID' and 'TARGET' columns
submission = pd.DataFrame({
    'Id': test_data_encoded['Id'],
    'SalePrice': y_pred
})

# Save submission file
submission.to_csv("MODEL03.csv", index=False)
print("Submission file saved as MODEL03.csv")


Predictions for the test data: [154095.2914949  180473.75930667 199637.87178264 ... 204422.46787048
  99177.00989121 235293.27027465]
Submission file saved as MODEL03.csv


In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

# Step 1: Generate a sample regression dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Define the MLP Regressor model
mlp = MLPRegressor(max_iter=1000, random_state=42)

# Step 4: Define the hyperparameters grid to search over
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (100, 50), (100, 100)],  # Tuple defines the number of neurons per layer
    'activation': ['relu', 'tanh', 'logistic'],  # Activation functions
    'solver': ['adam', 'lbfgs', 'sgd'],  # Optimizers
    'alpha': [0.0001, 0.001, 0.01, 0.1],  # Regularization term
    'learning_rate_init': [0.001, 0.01, 0.1]  # Learning rate
}

# Step 5: Setup GridSearchCV with cross-validation
grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error', verbose=2)

# Step 6: Fit GridSearchCV to the training data
grid_search.fit(X_train, y_train)

# Step 7: Get the best hyperparameters
print("Best Hyperparameters: ", grid_search.best_params_)

# Step 8: Evaluate the best model on the test set
best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_test)

# Evaluate the performance using Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f"Test MSE: {mse:.4f}")


Fitting 5 folds for each of 432 candidates, totalling 2160 fits
Best Hyperparameters:  {'activation': 'relu', 'alpha': 0.1, 'hidden_layer_sizes': (50,), 'learning_rate_init': 0.001, 'solver': 'lbfgs'}
Test MSE: 0.0531


**Use Support Vector Regressor to generate Model 01 and perform hyperparameter tuning with cross-validation**

In [None]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

# Separate features (X) and target (y) in the training data
# Assuming the target column is named 'SalePrice'
X_train = train_data_encoded.drop(columns=["Id", "SalePrice"])
y_train = train_data_encoded["SalePrice"]

# Scale the features for SVR (SVR is sensitive to feature scaling)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Train the Support Vector Regressor
model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
model.fit(X_train_scaled, y_train)

# Prepare and scale test features
X_test = test_data_encoded.drop(columns=["Id", "SalePrice"])
X_test_scaled = scaler.transform(X_test)

# Predict on the test data
y_pred = model.predict(X_test_scaled)

# Optionally, print the results or save predictions to a file
print("Predictions for the test data:", y_pred)

# Create submission DataFrame with 'ID' and 'TARGET' columns
submission = pd.DataFrame({
    'Id': test_data_encoded['Id'],
    'SalePrice': y_pred
})

# Save submission file
submission.to_csv("MODEL04.csv", index=False)
print("Submission file saved as MODEL04.csv")

Predictions for the test data: [162962.49226802 163010.098932   163060.51686993 ... 163010.84719959
 162990.81409524 163057.66450302]
Submission file saved as MODEL04.csv


In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Step 1: Generate a sample regression dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Define the SVR model
svr = SVR()

# Step 4: Define the hyperparameters grid to search over
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'epsilon': [0.01, 0.1, 0.2, 0.3],  # Epsilon parameter for margin of tolerance
    'kernel': ['linear', 'poly', 'rbf'],  # Kernel type: linear, polynomial, or radial basis function
    'degree': [2, 3],  # Degree of the polynomial kernel (only used if kernel='poly')
    'gamma': ['scale', 'auto']  # Kernel coefficient (only used if kernel='rbf' or kernel='poly')
}

# Step 5: Setup GridSearchCV with cross-validation
grid_search = GridSearchCV(estimator=svr, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error', verbose=2)

# Step 6: Fit GridSearchCV to the training data
grid_search.fit(X_train, y_train)

# Step 7: Get the best hyperparameters
print("Best Hyperparameters: ", grid_search.best_params_)

# Step 8: Evaluate the best model on the test set
best_svr = grid_search.best_estimator_
y_pred = best_svr.predict(X_test)

# Evaluate the performance using Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f"Test MSE: {mse:.4f}")

**Use Gradient Descent Regressor to generate Model 01 and perform hyperparameter tuning with cross-validation**

In [None]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

# Separate features (X) and target (y) in the training data
# Assuming the target column is named 'SalePrice'
X_train = train_data_encoded.drop(columns=["Id", "SalePrice"])
y_train = train_data_encoded["SalePrice"]

# Scale the features (Gradient Descent benefits from feature scaling)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Train the Gradient Descent Regressor
model = SGDRegressor(max_iter=1000, tol=1e-3, random_state=42)
model.fit(X_train_scaled, y_train)

# Prepare and scale test features
X_test = test_data_encoded.drop(columns=["Id", "SalePrice"])
X_test_scaled = scaler.transform(X_test)

# Predict on the test data
y_pred = model.predict(X_test_scaled)

# Optionally, print the results or save predictions to a file
print("Predictions for the test data:", y_pred)

# Create submission DataFrame with 'ID' and 'TARGET' columns
submission = pd.DataFrame({
    'Id': test_data_encoded['Id'],
    'SalePrice': y_pred
})

# Save submission file
submission.to_csv("MODEL05.csv", index=False)
print("Submission file saved as MODEL05.csv")

Predictions for the test data: [ 138088.40138752 1683910.00371356  181792.63785958 ...  120910.58625862
  155935.92283795  222287.4139445 ]
Submission file saved as MODEL05.csv


In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error

# Step 1: Generate a sample regression dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Define the SGD Regressor model
sgd_regressor = SGDRegressor(max_iter=1000, random_state=42)

# Step 4: Define the hyperparameters grid to search over
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1],  # Regularization strength
    'learning_rate': ['constant', 'optimal', 'invscaling'],  # Learning rate schedules
    'penalty': ['l2', 'l1', 'elasticnet'],  # Regularization types
    'max_iter': [1000, 2000, 5000],  # Maximum number of iterations
    'tol': [1e-4, 1e-3, 1e-2]  # Tolerance for stopping criteria
}

# Step 5: Setup GridSearchCV with cross-validation
grid_search = GridSearchCV(estimator=sgd_regressor, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error', verbose=2)

# Step 6: Fit GridSearchCV to the training data
grid_search.fit(X_train, y_train)

# Step 7: Get the best hyperparameters
print("Best Hyperparameters: ", grid_search.best_params_)

# Step 8: Evaluate the best model on the test set
best_sgd_regressor = grid_search.best_estimator_
y_pred = best_sgd_regressor.predict(X_test)

# Evaluate the performance using Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f"Test MSE: {mse:.4f}")


**ensemble predictions (one-layer stacking) and hyperparameter tuning with cross-validation on the stacked model**

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Step 1: Generate a sample regression dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Define base models
decision_tree = DecisionTreeRegressor(random_state=42)
random_forest = RandomForestRegressor(random_state=42)
svm = SVR()

# Step 4: Define the meta-model (Linear Regression)
meta_model = LinearRegression()

# Step 5: Create the Stacking Regressor
stacked_model = StackingRegressor(
    estimators=[('dt', decision_tree), ('rf', random_forest), ('svm', svm)],
    final_estimator=meta_model
)

# Step 6: Define hyperparameter grids for the base models
param_grid = {
    'dt__max_depth': [5, 10, 20],
    'rf__n_estimators': [50, 100, 200],
    'rf__max_depth': [5, 10, 20],
    'svm__C': [0.1, 1, 10],
    'svm__epsilon': [0.01, 0.1, 0.2],
    'final_estimator__fit_intercept': [True, False]
}

# Step 7: Perform GridSearchCV to find the best parameters for the stacked model
grid_search = GridSearchCV(estimator=stacked_model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error', verbose=2)

# Step 8: Fit the GridSearchCV to the training data
grid_search.fit(X_train, y_train)

# Step 9: Print the best parameters found by GridSearchCV
print("Best Hyperparameters: ", grid_search.best_params_)

# Step 10: Evaluate the best stacked model on the test set
best_stacked_model = grid_search.best_estimator_
y_pred = best_stacked_model.predict(X_test)

# Step 11: Evaluate the performance using Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f"Test MSE: {mse:.4f}")

# Step 12: Save the predictions to a CSV file
output = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
output.to_csv('stacked_model_predictions.csv', index=False)

print("Predictions have been saved to 'stacked_model_predictions.csv'")

Fitting 5 folds for each of 486 candidates, totalling 2430 fits


KeyboardInterrupt: 

**Feature selection using SelectFromModel**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score

# Step 1: Load the CSV file into a pandas DataFrame
trainData = pd.read_csv(r'/content/sample_data/restaurant_train.csv')
testData = pd.read_csv(r'/content/sample_data/restaurant_test.csv')


# Step 2: Prepare the dataset
# Assume the last column is the target variable
X = trainData.iloc[:, :-1]  # Features (all columns except the last one)
y = trainData.iloc[:, -1]   # Target variable (last column)

# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train a model to get feature importance
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 5: Perform feature selection using SelectFromModel
selector = SelectFromModel(model, threshold="mean", max_features=None)
X_train_selected = selector.transform(X_train)
X_test_selected = selector.transform(X_test)

# Step 6: Evaluate the model performance on selected features
# Train a new model with the selected features
model_selected = RandomForestClassifier(n_estimators=100, random_state=42)
model_selected.fit(X_train_selected, y_train)

# Predict on the test set with the selected features
y_pred = model_selected.predict(X_test_selected)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy with selected features: {accuracy:.4f}")

# Step 7: Get the selected feature names
selected_features = X.columns[selector.get_support()]
print(f"Selected features: {selected_features}")

# Step 8: Save the dataset with the selected features to a new CSV file
X_selected = trainData[selected_features]
X_selected['target'] = y  # Assuming 'target' is the name of the target column
X_selected.to_csv('selected_features_data.csv', index=False)

print("Dataset with selected features saved to 'selected_features_data.csv'")

Model accuracy with selected features: 0.0000
Selected features: Index(['Id', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P8', 'P11', 'P19', 'P20', 'P21', 'P22', 'P23', 'P28', 'P29'], dtype='object')
Dataset with selected features saved to 'selected_features_data.csv'


**Feature selection using SequentialFeatureSelector**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
import numpy as np

# Step 1: Load the CSV file into a pandas DataFrame
trainData = pd.read_csv(r'/content/sample_data/restaurant_train.csv')
testData = pd.read_csv(r'/content/sample_data/restaurant_test.csv')


# Step 2: Prepare the dataset
# Assume the last column is the target variable
X = trainData.iloc[:, :-1]  # Features (all columns except the last one)
y = trainData.iloc[:, -1]   # Target variable (last column)

# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Define the model (using RandomForestClassifier here)
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Step 5: Use SequentialFeatureSelector for feature selection
# Forward selection with cross-validation
sfs = SequentialFeatureSelector(model, n_features_to_select='auto', direction='forward', cv=KFold(n_splits=5))
sfs.fit(X_train, y_train)

# Step 6: Get the selected features
selected_features = X.columns[sfs.get_support()]
print(f"Selected features: {selected_features}")

# Step 7: Train the model with selected features
X_train_selected = X_train[selected_features]
X_test_selected = X_test[selected_features]

# Train the model using selected features
model.fit(X_train_selected, y_train)

# Predict on the test set with selected features
y_pred = model.predict(X_test_selected)

# Step 8: Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy with selected features: {accuracy:.4f}")

# Step 9: Save the dataset with selected features to a new CSV file
X_selected = trainData[selected_features]
X_selected['target'] = y  # Assuming 'target' is the name of the target column
X_selected.to_csv('selected_features_data.csv', index=False)

print("Dataset with selected features saved to 'selected_features_data.csv'")

Selected features: Index(['Id', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15', 'P16', 'P17', 'P18'], dtype='object')
Model accuracy with selected features: 0.0000
Dataset with selected features saved to 'selected_features_data.csv'


**Feature selection using GeneticSelectionCV**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
!pip install git+https://github.com/rasbt/mlxtend.git
!pip install mlxtend--upgrade mlxtend
#pip install --upgrade mlxtend
#pip install -r requirements.txt
from mlxtend.feature_selection import GeneticSelectionCV
from mlxtend.feature_selection import GeneticSelectionCV

# Step 1: Load the CSV file into a pandas DataFrame
trainData = pd.read_csv(r'/content/sample_data/restaurant_train.csv')
testData = pd.read_csv(r'/content/sample_data/restaurant_test.csv')

# Step 2: Prepare the dataset
# Assume the last column is the target variable
X = trainData.iloc[:, :-1]  # Features (all columns except the last one)
y = trainData.iloc[:, -1]   # Target variable (last column)

# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Define the model (using RandomForestClassifier here)
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Step 5: Perform feature selection using GeneticSelectionCV
# Setting the population_size, generations, and other hyperparameters
genetic_selector = GeneticSelectionCV(
    estimator=model,
    cv=5,
    verbose=1,
    population_size=50,
    generations=20,
    tournament_size=5,
    n_jobs=-1,
    random_state=42
)

# Fit the genetic algorithm to the data
genetic_selector.fit(X_train, y_train)

# Step 6: Get the selected features
selected_features = X.columns[genetic_selector.support_]
print(f"Selected features: {selected_features}")

# Step 7: Train the model using the selected features
X_train_selected = X_train[selected_features]
X_test_selected = X_test[selected_features]

# Train the model using the selected features
model.fit(X_train_selected, y_train)

# Predict on the test set with selected features
y_pred = model.predict(X_test_selected)

# Step 8: Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy with selected features: {accuracy:.4f}")

# Step 9: Save the dataset with selected features to a new CSV file
X_selected = trainData[selected_features]
X_selected['target'] = y  # Assuming 'target' is the name of the target column
X_selected.to_csv('selected_features_data.csv', index=False)

print("Dataset with selected features saved to 'selected_features_data.csv'")

Collecting git+https://github.com/rasbt/mlxtend.git
  Cloning https://github.com/rasbt/mlxtend.git to /tmp/pip-req-build-yfkd2r1n
  Running command git clone --filter=blob:none --quiet https://github.com/rasbt/mlxtend.git /tmp/pip-req-build-yfkd2r1n
  Resolved https://github.com/rasbt/mlxtend.git to commit 9f0bc8f3a608f2e0de8d5bb3a1b203bd3dfc6584
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[31mERROR: Could not find a version that satisfies the requirement mlxtend--upgrade (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for mlxtend--upgrade[0m[31m
[0m

ImportError: cannot import name 'GeneticSelectionCV' from 'mlxtend.feature_selection' (/usr/local/lib/python3.10/dist-packages/mlxtend/feature_selection/__init__.py)