In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import KNNImputer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from xgboost import XGBRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder
from category_encoders import TargetEncoder
from sklearn.pipeline import FeatureUnion
from sklearn.model_selection import cross_val_score
from catboost import CatBoostRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import mutual_info_regression
from sklearn.model_selection import RandomizedSearchCV
import optuna
from optuna.integration import OptunaSearchCV
from sklearn.base import BaseEstimator, TransformerMixin
from collections import defaultdict

In [2]:
data = pd.read_csv("named_train.csv")

In [3]:
X = data.drop(columns=["Y"])

In [4]:
Y = data["Y"]

In [5]:
class ItemWeightImputer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.item_weight_mode = defaultdict(lambda: None)  # Stores mode of Item_Weight per Item_Identifier
        self.global_mean = None  # Fallback global mean for Item_Weight

    def fit(self, X, y=None):
        X = X.copy()

        # Ensure the required columns exist
        if 'Item_Identifier' not in X.columns or 'Item_Weight' not in X.columns:
            raise ValueError("Both 'Item_Identifier' and 'Item_Weight' columns must be present in the dataset.")

        # Ensure no None values in Item_Identifier
        if X['Item_Identifier'].isnull().any():
            raise ValueError("Item_Identifier column contains None values.")

        # Calculate mode Item_Weight for each Item_Identifier
        item_weight_mode = X.groupby('Item_Identifier')['Item_Weight'].agg(lambda x: x.mode().iloc[0] if not x.mode().empty else np.nan)
        self.item_weight_mode.update(item_weight_mode.to_dict())

        # Calculate global mean for the Item_Weight column
        if X['Item_Weight'].notnull().any():
            self.global_mean = X['Item_Weight'].mean()
        else:
            raise ValueError("Item_Weight column contains only NaN values.")

        return self

    def transform(self, X):
        X = X.copy()

        # Ensure the required columns exist
        if 'Item_Identifier' not in X.columns or 'Item_Weight' not in X.columns:
            raise ValueError("Both 'Item_Identifier' and 'Item_Weight' columns must be present in the dataset.")

        # Ensure no None values in Item_Identifier
        if X['Item_Identifier'].isnull().any():
            raise ValueError("Item_Identifier column contains None values.")

        # Safely impute NaN Item_Weights based on Item_Identifier or global mean
        X['Item_Weight'] = X.apply(
            lambda row: self.item_weight_mode.get(row['Item_Identifier'], self.global_mean)
            if pd.isnull(row['Item_Weight'])
            else row['Item_Weight'],
            axis=1
        )

        X['Item_Weight'] = X['Item_Weight'].fillna(self.global_mean)

        return X[['Item_Weight']]

In [6]:
class VisibilityZerosImputer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.item_visibility_mean = defaultdict(lambda: None)  # Stores mean of Item_Visibility per Item_Identifier
        self.global_mean = None  # Fallback global mean for Item_Visibility

    def fit(self, X, y=None):
        X = X.copy()

        # Ensure the required columns exist
        if 'Item_Identifier' not in X.columns or 'Item_Visibility' not in X.columns:
            raise ValueError("Both 'Item_Identifier' and 'Item_Visibility' columns must be present in the dataset.")

        # Ensure no None values in Item_Identifier
        if X['Item_Identifier'].isnull().any():
            raise ValueError("Item_Identifier column contains None values.")

        # Calculate mean Item_Visibility for each Item_Identifier
        item_visibility_mean = X.groupby('Item_Identifier')['Item_Visibility'].mean()
        self.item_visibility_mean.update(item_visibility_mean.to_dict())

        # Calculate global mean for the Item_Visibility column
        if X['Item_Visibility'].notnull().any():
            self.global_mean = X['Item_Visibility'].mean()
        else:
            raise ValueError("Item_Visibility column contains only NaN values.")

        return self

    def transform(self, X):
        X = X.copy()

        # Ensure the required columns exist
        if 'Item_Identifier' not in X.columns or 'Item_Visibility' not in X.columns:
            raise ValueError("Both 'Item_Identifier' and 'Item_Visibility' columns must be present in the dataset.")

        # Ensure no None values in Item_Identifier
        if X['Item_Identifier'].isnull().any():
            raise ValueError("Item_Identifier column contains None values.")

        # Safely impute zero Item_Visibility based on Item_Identifier or global mean
        X['Item_Visibility'] = X.apply(
            lambda row: self.item_visibility_mean.get(row['Item_Identifier'], self.global_mean)
            if row['Item_Visibility'] == 0
            else row['Item_Visibility'],
            axis=1
        )

        X['Item_Visibility'] = X['Item_Visibility'].replace(0, self.global_mean)
        X['Item_Visibility'] = np.sqrt(X['Item_Visibility'])

        return X[['Item_Visibility']]

In [7]:
X['Category'] = X['Item_Identifier'].str[:2]

In [8]:
X["Item_Fat_Content"] = X["Item_Fat_Content"].replace({"low fat": "LF", "Low Fat": "LF", "Regular": "REG", "reg": "REG"})
X.loc[X['Category'] == 'NC', 'Item_Fat_Content'] = 'nofat'

In [9]:
X['age'] = 2024 - X['Outlet_Establishment_Year']

In [10]:
impute_sizes = {
    "OUT010": "Small",
    "OUT017": "Small",
    "OUT045": "Medium"
}
X['Outlet_Size'] = X.apply(
    lambda row: impute_sizes[row['Outlet_Identifier']] if pd.isnull(row['Outlet_Size']) else row['Outlet_Size'], axis=1
)

In [11]:
X['MRP_cluster']=pd.cut(X["Item_MRP"],bins=[25,69,137,203,270],labels=['very low','low','high','very high'],right=True)

In [12]:
weight_imputer =  ItemWeightImputer()

In [13]:
X["Item_Weight"] = weight_imputer.fit_transform(X)


In [14]:
X["Weight_per_Unit_MRP"] = X["Item_Weight"] / X["Item_MRP"]

In [15]:
X['MRP_cluster']=pd.cut(X["Item_MRP"],bins=[25,69,137,203,270],labels=['very low','low','high','very high'],right=True)

In [16]:
X['Item_Type_Outlet_Type'] = X['Item_Type'] + "_" + X['Outlet_Type']
X['Item_Fat_Content_Outlet_Type'] = X['Item_Fat_Content'] + "_" + X['Outlet_Type']
X['Outlet_Size_Outlet_Location_Type'] = X['Outlet_Size'] + "_" + X['Outlet_Location_Type']
X['Item_Type_Item_Fat_Content'] = X['Item_Type'] + "_" + X['Item_Fat_Content']
X['Outlet_Type_Outlet_Location_Type'] = X['Outlet_Type'] + "_" + X['Outlet_Location_Type']

# Numerical × Numerical Interactions
X['Item_MRP_Outlet_Establishment_Year'] = X['Item_MRP'] * X['Outlet_Establishment_Year']
X['Item_MRP_Item_Visibility'] = X['Item_MRP'] * X['Item_Visibility']
X['Years_Operating_Outlet_Type'] = X['age'] * X['Outlet_Type'].apply(lambda x: hash(x) % 1000) 
X['Outlet_Identifier_Years_Operating'] = X['Outlet_Identifier'].apply(lambda x: hash(x) % 1000) * X['age']

# Numerical × Categorical Interactions
X['Item_Visibility_Item_Type'] = X['Item_Visibility'] * X['Item_Type'].apply(lambda x: hash(x) % 1000)

In [26]:
X

Unnamed: 0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,...,Item_Type_Outlet_Type,Item_Fat_Content_Outlet_Type,Outlet_Size_Outlet_Location_Type,Item_Type_Item_Fat_Content,Outlet_Type_Outlet_Location_Type,Item_MRP_Outlet_Establishment_Year,Item_MRP_Item_Visibility,Years_Operating_Outlet_Type,Outlet_Identifier_Years_Operating,Item_Visibility_Item_Type
0,FDA15,9.300,LF,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,...,Dairy_Supermarket Type1,LF_Supermarket Type1,Medium_Tier 1,Dairy_LF,Supermarket Type1_Tier 1,499368.5908,4.008763,10025,14425,13.094598
1,DRC01,5.920,REG,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,...,Soft Drinks_Supermarket Type2,REG_Supermarket Type2,Medium_Tier 3,Soft Drinks_REG,Supermarket Type2_Tier 3,96972.8228,0.930544,420,2775,6.014803
2,FDN15,17.500,LF,0.016760,Meat,141.6180,OUT049,1999,Medium,Tier 1,...,Meat_Supermarket Type1,LF_Supermarket Type1,Medium_Tier 1,Meat_LF,Supermarket Type1_Tier 1,283094.3820,2.373528,10025,14425,5.195623
3,FDX07,19.200,REG,0.000000,Fruits and Vegetables,182.0950,OUT010,1998,Small,Tier 3,...,Fruits and Vegetables_Grocery Store,REG_Grocery Store,Small_Tier 3,Fruits and Vegetables_REG,Grocery Store_Tier 3,363825.8100,0.000000,12402,10374,0.000000
4,NCD19,8.930,nofat,0.000000,Household,53.8614,OUT013,1987,High,Tier 3,...,Household_Supermarket Type1,nofat_Supermarket Type1,High_Tier 3,Household_nofat,Supermarket Type1_Tier 3,107022.6018,0.000000,14837,21053,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,FDB32,20.600,LF,0.023586,Fruits and Vegetables,94.7778,OUT017,2007,Small,Tier 2,...,Fruits and Vegetables_Supermarket Type1,LF_Supermarket Type1,Small_Tier 2,Fruits and Vegetables_LF,Supermarket Type1_Tier 2,190219.0446,2.235391,6817,10965,0.377370
5996,FDJ16,9.195,LF,0.115064,Frozen Foods,58.6246,OUT049,1999,Medium,Tier 1,...,Frozen Foods_Supermarket Type1,LF_Supermarket Type1,Medium_Tier 1,Frozen Foods_LF,Supermarket Type1_Tier 1,117190.5754,6.745596,10025,14425,39.812235
5997,FDJ32,10.695,LF,0.057910,Fruits and Vegetables,60.4536,OUT045,2002,Medium,Tier 2,...,Fruits and Vegetables_Supermarket Type1,LF_Supermarket Type1,Medium_Tier 2,Fruits and Vegetables_LF,Supermarket Type1_Tier 2,121028.1072,3.500841,8822,15070,0.926553
5998,FDO12,15.750,LF,0.054920,Baking Goods,195.8452,OUT035,2004,Small,Tier 2,...,Baking Goods_Supermarket Type1,LF_Supermarket Type1,Small_Tier 2,Baking Goods_LF,Supermarket Type1_Tier 2,392473.7808,10.755847,8020,19220,22.956621


In [22]:
# Define the columns for each type of transformation
numerical_cols = ["Item_Weight", "Weight_per_Unit_MRP", "Item_Visibility", "Item_MRP_Outlet_Establishment_Year", "Item_MRP_Item_Visibility", "Years_Operating_Outlet_Type", "Outlet_Identifier_Years_Operating", "Item_Visibility_Item_Type"]
one_hot_columns = ["Outlet_Location_Type", "Category", "Item_Fat_Content", "Outlet_Size", "Outlet_Type", "Item_Type_Outlet_Type", "Item_Fat_Content_Outlet_Type", "Outlet_Size_Outlet_Location_Type", "Item_Type_Item_Fat_Content", "Outlet_Type_Outlet_Location_Type"]
target_encoder_cols = ['Item_Identifier','Item_Type', 'Outlet_Identifier']
ordinal_categories = [['very low', 'low', 'high', 'very high']]

# Numerical pipeline
numerical_pipeline = Pipeline([
    ("mean_imputer", SimpleImputer(strategy='mean')),
    ("scaler", StandardScaler())
])

#Target encoding pipeline
target_encoding_pipeline = Pipeline([
    ("target_encoder", TargetEncoder(cols=target_encoder_cols, smoothing=0.5))
])

# One-hot encoding pipeline
onehot_transform_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
])

# Ordinal encoding pipeline
ordinal_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("ordinal", OrdinalEncoder(categories=ordinal_categories))
])

# Combine all pipelines into a ColumnTransformer
preprocessor = ColumnTransformer([
    ("num", numerical_pipeline, numerical_cols),
    ("target_encoder", target_encoding_pipeline, target_encoder_cols),
    ("onehot", onehot_transform_pipeline, one_hot_columns),
    ("ordinal", ordinal_pipeline, ["MRP_cluster"])
])

# Final pipeline
final_pipeline = Pipeline([
    ("preprocessor", preprocessor)
])


In [27]:
import optuna
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score

def objective_svr(trial):
    param = {
        'C': trial.suggest_float('C', 0.1, 100.0),
        'epsilon': trial.suggest_float('epsilon', 0.01, 1.0),
        'kernel': trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
        'degree': trial.suggest_int('degree', 2, 5) if trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']) == 'poly' else 3,
        'gamma': trial.suggest_categorical('gamma', ['scale', 'auto'])
    }
    
    # Create a pipeline with the final transformer and SVR
    model = SVR(**param)
    pipeline = Pipeline([
        ('final_transform', final_pipeline),
        ('svr', model)
    ])

    # Perform cross-validation with error handling
    try:
        scores = cross_val_score(pipeline, X, Y, cv=5, scoring='neg_mean_absolute_error', error_score='raise')
        mae = -scores.mean()
        print(f"Trial MAE: {mae}")
    except Exception as e:
        print(f"Error during cross-validation: {e}")
        mae = float('inf')  # Assign a high error value if an exception occurs

    return mae

study_svr = optuna.create_study(direction='minimize')
study_svr.optimize(objective_svr, n_trials=50)

print("SVR Best Parameters:", study_svr.best_params)
print("SVR Best MAE:", study_svr.best_value)

[I 2024-12-28 01:58:01,978] A new study created in memory with name: no-name-abf34214-65b0-4fa5-876e-a78c6e1a8e86
[I 2024-12-28 01:58:13,222] Trial 0 finished with value: 0.4214555321282167 and parameters: {'C': 77.10979784959987, 'epsilon': 0.2665413533584772, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 0 with value: 0.4214555321282167.


Trial MAE: 0.4214555321282167


[I 2024-12-28 01:59:26,930] Trial 1 finished with value: 0.4320385936813132 and parameters: {'C': 64.31884001534831, 'epsilon': 0.777798910797332, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 0 with value: 0.4214555321282167.


Trial MAE: 0.4320385936813132


[I 2024-12-28 01:59:42,845] Trial 2 finished with value: 0.44315085463953724 and parameters: {'C': 93.73792908770996, 'epsilon': 0.4847977686785702, 'kernel': 'poly', 'degree': 5, 'gamma': 'scale'}. Best is trial 0 with value: 0.4214555321282167.


Trial MAE: 0.44315085463953724


[I 2024-12-28 01:59:52,637] Trial 3 finished with value: 0.4258156968015525 and parameters: {'C': 94.33144250246599, 'epsilon': 0.40775045821969635, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 0 with value: 0.4214555321282167.


Trial MAE: 0.4258156968015525


[I 2024-12-28 02:00:02,361] Trial 4 finished with value: 26.769838126020495 and parameters: {'C': 86.30404770829756, 'epsilon': 0.39739802786421813, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 0 with value: 0.4214555321282167.


Trial MAE: 26.769838126020495


[I 2024-12-28 02:00:12,738] Trial 5 finished with value: 0.4059916188349114 and parameters: {'C': 6.062738709449914, 'epsilon': 0.0588950225490227, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.4059916188349114


[I 2024-12-28 02:00:32,487] Trial 6 finished with value: 0.42340064066077715 and parameters: {'C': 41.534235635538984, 'epsilon': 0.07833891978057485, 'kernel': 'poly', 'degree': 4, 'gamma': 'scale'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.42340064066077715


[I 2024-12-28 02:00:34,985] Trial 7 finished with value: 0.4509455780664787 and parameters: {'C': 31.28421185995635, 'epsilon': 0.8155359959670846, 'kernel': 'poly', 'degree': 3, 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.4509455780664787


[I 2024-12-28 02:00:45,543] Trial 8 finished with value: 0.42259265084610337 and parameters: {'C': 73.03757729610832, 'epsilon': 0.37194809652391775, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.42259265084610337


[I 2024-12-28 02:00:53,286] Trial 9 finished with value: 18.088750584945213 and parameters: {'C': 58.322168741219336, 'epsilon': 0.27603631933922745, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 18.088750584945213


[I 2024-12-28 02:01:51,291] Trial 10 finished with value: 0.40834920989296436 and parameters: {'C': 5.758059650675468, 'epsilon': 0.061779292926026186, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.40834920989296436


[I 2024-12-28 02:02:06,402] Trial 11 finished with value: 0.4082066459861443 and parameters: {'C': 0.6885840461961212, 'epsilon': 0.01006197442412135, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.4082066459861443


[I 2024-12-28 02:02:19,016] Trial 12 finished with value: 0.4081658119606192 and parameters: {'C': 0.42265349744593933, 'epsilon': 0.015828766056381574, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.4081658119606192


[I 2024-12-28 02:04:15,031] Trial 13 finished with value: 0.41035478818399707 and parameters: {'C': 18.233567127416205, 'epsilon': 0.19338851122880912, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.41035478818399707


[I 2024-12-28 02:04:16,465] Trial 14 finished with value: 0.48766810944685995 and parameters: {'C': 21.125587291065347, 'epsilon': 0.9928887375374049, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.48766810944685995


[I 2024-12-28 02:05:50,007] Trial 15 finished with value: 0.40963105558751883 and parameters: {'C': 13.941998631909206, 'epsilon': 0.15995986709268972, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.40963105558751883


[I 2024-12-28 02:05:54,291] Trial 16 finished with value: 0.42515549430405575 and parameters: {'C': 36.50643350183964, 'epsilon': 0.6117026970480445, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.42515549430405575


[I 2024-12-28 02:06:04,697] Trial 17 finished with value: 9.578162401262743 and parameters: {'C': 25.91549244656273, 'epsilon': 0.16109911511373343, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 9.578162401262743


[I 2024-12-28 02:06:29,802] Trial 18 finished with value: 0.4167910620908676 and parameters: {'C': 8.01633615494192, 'epsilon': 0.5678619833806107, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.4167910620908676


[I 2024-12-28 02:06:41,156] Trial 19 finished with value: 0.41785785797189334 and parameters: {'C': 48.310470664774044, 'epsilon': 0.2647266996745715, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.41785785797189334


[I 2024-12-28 02:06:58,809] Trial 20 finished with value: 0.40911662881826494 and parameters: {'C': 1.1680329609038687, 'epsilon': 0.10385435358477768, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.40911662881826494


[I 2024-12-28 02:07:15,597] Trial 21 finished with value: 0.40836986349660737 and parameters: {'C': 0.90197157264598, 'epsilon': 0.04375760546843227, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.40836986349660737


[I 2024-12-28 02:09:03,042] Trial 22 finished with value: 0.4081849006504523 and parameters: {'C': 11.472928047081624, 'epsilon': 0.01436482104243553, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.4081849006504523


[I 2024-12-28 02:10:32,168] Trial 23 finished with value: 0.40959646200351385 and parameters: {'C': 12.319726584328627, 'epsilon': 0.16236337341089233, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.40959646200351385


[I 2024-12-28 02:12:45,702] Trial 24 finished with value: 0.408308311557415 and parameters: {'C': 20.084600748626123, 'epsilon': 0.013948812956270984, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.408308311557415


[I 2024-12-28 02:12:54,662] Trial 25 finished with value: 0.41399569950476245 and parameters: {'C': 27.56600836202587, 'epsilon': 0.11127361377042905, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.41399569950476245


[I 2024-12-28 02:12:58,946] Trial 26 finished with value: 0.4073471385479797 and parameters: {'C': 12.2221012705219, 'epsilon': 0.2414893803246754, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.4073471385479797


[I 2024-12-28 02:13:04,486] Trial 27 finished with value: 0.411306168719773 and parameters: {'C': 40.342367757073, 'epsilon': 0.2979831130297814, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 5 with value: 0.4059916188349114.


Trial MAE: 0.411306168719773


[I 2024-12-28 02:13:08,717] Trial 28 finished with value: 0.4053540068336095 and parameters: {'C': 7.312466579439736, 'epsilon': 0.1983150137545809, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 28 with value: 0.4053540068336095.


Trial MAE: 0.4053540068336095


[I 2024-12-28 02:13:14,632] Trial 29 finished with value: 0.41034974778549627 and parameters: {'C': 32.71534968175675, 'epsilon': 0.2268530310394822, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 28 with value: 0.4053540068336095.


Trial MAE: 0.41034974778549627


[I 2024-12-28 02:13:18,289] Trial 30 finished with value: 0.40909678244173103 and parameters: {'C': 17.089733234835094, 'epsilon': 0.3444426429545555, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 28 with value: 0.4053540068336095.


Trial MAE: 0.40909678244173103


[I 2024-12-28 02:13:23,411] Trial 31 finished with value: 0.4058268662195232 and parameters: {'C': 5.489194896122922, 'epsilon': 0.11848247655288345, 'kernel': 'poly', 'degree': 3, 'gamma': 'scale'}. Best is trial 28 with value: 0.4053540068336095.


Trial MAE: 0.4058268662195232


[I 2024-12-28 02:13:27,741] Trial 32 finished with value: 0.4087984921812013 and parameters: {'C': 8.055614735437043, 'epsilon': 0.2356649400643677, 'kernel': 'poly', 'degree': 3, 'gamma': 'scale'}. Best is trial 28 with value: 0.4053540068336095.


Trial MAE: 0.4087984921812013


[I 2024-12-28 02:13:35,282] Trial 33 finished with value: 0.413185605742889 and parameters: {'C': 23.897195443522953, 'epsilon': 0.136800251350977, 'kernel': 'poly', 'degree': 3, 'gamma': 'scale'}. Best is trial 28 with value: 0.4053540068336095.


Trial MAE: 0.413185605742889


[I 2024-12-28 02:13:38,571] Trial 34 finished with value: 0.40523677373570344 and parameters: {'C': 6.499796200843558, 'epsilon': 0.3025450326599465, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 34 with value: 0.40523677373570344.


Trial MAE: 0.40523677373570344


[I 2024-12-28 02:13:41,258] Trial 35 finished with value: 0.41197479951864013 and parameters: {'C': 8.93895273693847, 'epsilon': 0.46518962871191377, 'kernel': 'poly', 'degree': 3, 'gamma': 'scale'}. Best is trial 34 with value: 0.40523677373570344.


Trial MAE: 0.41197479951864013


[I 2024-12-28 02:13:45,350] Trial 36 finished with value: 0.4080799741274405 and parameters: {'C': 16.2137327458861, 'epsilon': 0.312189864594723, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 34 with value: 0.40523677373570344.


Trial MAE: 0.4080799741274405


[I 2024-12-28 02:13:48,236] Trial 37 finished with value: 0.41200071616088446 and parameters: {'C': 5.574930170621857, 'epsilon': 0.4414274342786234, 'kernel': 'poly', 'degree': 4, 'gamma': 'scale'}. Best is trial 34 with value: 0.40523677373570344.


Trial MAE: 0.41200071616088446


[I 2024-12-28 02:13:56,909] Trial 38 finished with value: 0.4132029924263942 and parameters: {'C': 63.20089453741948, 'epsilon': 0.20647457208835548, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 34 with value: 0.40523677373570344.


Trial MAE: 0.4132029924263942


[I 2024-12-28 02:14:04,132] Trial 39 finished with value: 26.919823197267647 and parameters: {'C': 86.76621879277633, 'epsilon': 0.09673512470240353, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 34 with value: 0.40523677373570344.


Trial MAE: 26.919823197267647


[I 2024-12-28 03:01:43,538] Trial 40 finished with value: 0.4215127124268581 and parameters: {'C': 49.942829188935086, 'epsilon': 0.5397864684488087, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 34 with value: 0.40523677373570344.


Trial MAE: 0.4215127124268581


[I 2024-12-28 07:19:43,547] Trial 41 finished with value: 0.4051280600504568 and parameters: {'C': 5.396515587082291, 'epsilon': 0.3357744808400312, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 41 with value: 0.4051280600504568.


Trial MAE: 0.4051280600504568


[I 2024-12-28 09:09:58,274] Trial 42 finished with value: 0.40518131342463704 and parameters: {'C': 4.878171655372802, 'epsilon': 0.3595959459745432, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 41 with value: 0.4051280600504568.


Trial MAE: 0.40518131342463704


[I 2024-12-28 09:13:46,050] Trial 43 finished with value: 0.40529829241432075 and parameters: {'C': 3.8396057585183727, 'epsilon': 0.4008696910029216, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 41 with value: 0.4051280600504568.


Trial MAE: 0.40529829241432075


[I 2024-12-28 09:13:56,489] Trial 44 finished with value: 0.4086889787076123 and parameters: {'C': 13.551142920753605, 'epsilon': 0.40646046737228425, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 41 with value: 0.4051280600504568.


Trial MAE: 0.4086889787076123


[I 2024-12-28 09:14:02,393] Trial 45 finished with value: 0.4044536731889476 and parameters: {'C': 3.867424947798011, 'epsilon': 0.34724774976860173, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 45 with value: 0.4044536731889476.


Trial MAE: 0.4044536731889476


[I 2024-12-28 09:14:08,513] Trial 46 finished with value: 0.4034886261445075 and parameters: {'C': 2.6311460970378824, 'epsilon': 0.35225148545949075, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 46 with value: 0.4034886261445075.


Trial MAE: 0.4034886261445075


[I 2024-12-28 09:14:22,560] Trial 47 finished with value: 0.41512956026043735 and parameters: {'C': 74.30174615653905, 'epsilon': 0.3398289947787215, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 46 with value: 0.4034886261445075.


Trial MAE: 0.41512956026043735


[I 2024-12-28 09:14:27,882] Trial 48 finished with value: 0.4130430629557454 and parameters: {'C': 23.22839298013828, 'epsilon': 0.5195120350646354, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 46 with value: 0.4034886261445075.


Trial MAE: 0.4130430629557454


[I 2024-12-28 09:14:31,323] Trial 49 finished with value: 0.4162954861493575 and parameters: {'C': 18.01181560651542, 'epsilon': 0.623247864971045, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 46 with value: 0.4034886261445075.


Trial MAE: 0.4162954861493575
SVR Best Parameters: {'C': 2.6311460970378824, 'epsilon': 0.35225148545949075, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}
SVR Best MAE: 0.4034886261445075


In [32]:
# import eli5
# from eli5.sklearn import PermutationImportance

# Get the best parameters from the study
best_params = study_svr.best_params

# Create the best model using the best parameters
best_model = SVR(**best_params)

# Create a pipeline with the final transformer and the best model
best_pipeline = Pipeline([
    ('final_transform', final_pipeline),
    ('svr', best_model)
])

best_pipeline.fit(X, Y)


In [33]:
# import eli5
# from eli5.sklearn import PermutationImportance

# # Fit the best pipeline on the data
# best_pipeline.fit(X, Y)

# # Use PermutationImportance to get feature importances
# perm = PermutationImportance(best_pipeline.named_steps['svr'], random_state=1).fit(best_pipeline.named_steps['final_transform'].transform(X), Y)

# # Display the feature importances
# eli5.show_weights(perm, feature_names=best_pipeline.named_steps['final_transform'].get_feature_names_out())

ImportError: cannot import name 'if_delegate_has_method' from 'sklearn.utils.metaestimators' (c:\Users\DELL\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\utils\metaestimators.py)