In [69]:
import numpy as np
import pandas as pd
import optuna
import dagshub
import mlflow
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.preprocessing import StandardScaler, RobustScaler, MaxAbsScaler, MinMaxScaler
from sklearn.model_selection import cross_val_score, KFold
from sklearn.preprocessing import OrdinalEncoder, TargetEncoder
from sklearn.svm import SVR
from feature_engine.encoding import MeanEncoder
from category_encoders import BinaryEncoder
from feature_engine.encoding import MeanEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [8]:
train = pd.read_parquet('C:/Users/aksha/OneDrive/Desktop/AutoNexusMlOps/data/Exp/train.parquet')
xtrain = train.drop(columns=['Price'])
ytrain = train['Price'].copy()

In [9]:
pd.set_option('display.max_columns', None)

In [10]:
xtrain.sample()

Unnamed: 0,Model_Year,Mileage,Brand_Name,Model_Name,Stock_Type,Exterior_Color,Interior_Color,Drivetrain,Km/L,Fuel_Type,Accidents_Or_Damage,Clean_Title,One_Owner_Vehicle,Personal_Use_Only,Level2_Charging,Dc_Fast_Charging,Battery_Capacity,Expected_Range,Gear_Spec,Engine_Size,Cylinder_Config,Valves,Km/L_e_City,Km/L_e_Hwy,City,STATE
155603,2020,91380,Toyota,Tacoma SR5,Used,white,gray,4WD,8.0,Gasoline,False,True,False,True,0.0,0.0,0.0,0.0,6,3.5,V6,24,0.0,0.0,garnett,kansas


In [77]:
def objective(trial):
    encoder_type = trial.suggest_categorical("encoder_type", ["Binary", "Target"])

    if encoder_type == "Binary":
        encoder = ColumnTransformer([
            ('ordinal_encoding',
             OrdinalEncoder(categories=[['New', 'Certified', 'Used']]),
             ['Stock_Type']),
            ('BinaryEncoder',
             BinaryEncoder(),
             ['Brand_Name', 'Model_Name', 'Exterior_Color',
              'Interior_Color', 'Drivetrain', 'Fuel_Type',
              'Cylinder_Config', 'City', 'STATE'])
        ],remainder='passthrough')
    
    else:
        encoder = ColumnTransformer([
            ('ordinal_encoding',
             OrdinalEncoder(categories=[['New', 'Certified', 'Used']]),
             ['Stock_Type']),
            ('TargetEncoder',
             TargetEncoder(),
             ['Brand_Name', 'Model_Name', 'Exterior_Color',
              'Interior_Color', 'Drivetrain', 'Fuel_Type',
              'Cylinder_Config', 'City', 'STATE'])
        ], remainder='passthrough')

    model_type = trial.suggest_categorical("model", ["Linear", "Ridge", "Lasso", "ElasticNet","SVR"])

    if model_type == "Linear":
        model = LinearRegression()

    elif model_type == "Ridge":
        model = Ridge(
            alpha=trial.suggest_float("ridge_alpha", 1e-3, 10.0, log=True),
            solver=trial.suggest_categorical("ridge_solver",  ["svd", "cholesky", "lsqr"])
        )

    elif model_type == "Lasso":
        model = Lasso(
            alpha=trial.suggest_float("lasso_alpha", 1e-3, 10.0, log=True),
            max_iter=trial.suggest_int("lasso_max_iter", 1000, 10000),
            tol=trial.suggest_float("lasso_tol", 1e-5, 1e-2, log=True),
            selection=trial.suggest_categorical("lasso_selection", ["cyclic", "random"])
        )
    
    elif model_type == "SVR":
        model = SVR(
          C=trial.suggest_float("svr_C", 1e-2, 100.0, log=True),
          kernel=trial.suggest_categorical("svr_kernel", ["linear", "poly", "rbf", "sigmoid"]),
          degree=trial.suggest_int("svr_degree", 2, 5),
          gamma=trial.suggest_categorical("svr_gamma", ["scale", "auto"]),
          epsilon=trial.suggest_float("svr_epsilon", 1e-3, 1.0, log=True),
          tol=trial.suggest_float("svr_tol", 1e-5, 1e-2, log=True),
          max_iter=trial.suggest_int("svr_max_iter", 1000, 10000)
    )

    else:  
        model = ElasticNet(
            alpha=trial.suggest_float("elastic_alpha", 1e-3, 10.0, log=True),
            l1_ratio=trial.suggest_float("elastic_l1_ratio", 0.0, 1.0),
            max_iter=trial.suggest_int("elastic_max_iter", 1000, 10000),
            tol=trial.suggest_float("elastic_tol", 1e-5, 1e-2, log=True),
            selection=trial.suggest_categorical("elastic_selection", ["cyclic", "random"])
        )
    
    scaler = ColumnTransformer(
        [
            ('std', StandardScaler(), ['Km/L']),
            ('norm', MinMaxScaler(), ['Model_Year','Valves']),
            ('robust', RobustScaler(),['Mileage']),
            ('maxabs', MaxAbsScaler(), ['Level2_Charging','Dc_Fast_Charging','Battery_Capacity','Gear_Spec','Km/L_e_City','Km/L_e_Hwy'])
        ],
        remainder='passthrough'
    )
    
    cols_for_encoding = ['Brand_Name', 'Model_Name', 'Exterior_Color',
                'Interior_Color', 'Drivetrain', 'Fuel_Type',
                'Cylinder_Config', 'City', 'STATE', 'Stock_Type']
    
    cols_for_scaling = ['Model_Year','Valves','Km/L','Mileage','Level2_Charging','Dc_Fast_Charging','Battery_Capacity','Gear_Spec','Km/L_e_City','Km/L_e_Hwy']
    
    transformer = ColumnTransformer([
        ('encoder',encoder,cols_for_encoding),
        ('scaler', scaler,cols_for_scaling),
    ])

    pipe = Pipeline(
        [
            ('transformer',transformer),
            ('model',model)
        ]
    )

    cv = KFold(n_splits=3, shuffle=True, random_state=42)
    score = cross_val_score(pipe, xtrain, ytrain, cv=cv, scoring='r2', n_jobs=-1).mean()

    return score

In [76]:
mlflow.set_tracking_uri('https://dagshub.com/akshatsharma2407/AutoNexusMlOps.mlflow')
dagshub.init(repo_owner='akshatsharma2407', repo_name='AutoNexusMlOps', mlflow=True)

In [78]:
study = optuna.create_study(
      study_name='Linear Algos with Different Encoding & Scaling techniques',
      direction='maximize',
      storage='sqlite:///../reports/autonexus_optuna.db',
      load_if_exists=True
    )

study.optimize(objective,n_trials=30)

[I 2025-10-31 17:44:05,564] A new study created in RDB with name: Linear Algos with Different Encoding & Scaling techniques
[I 2025-10-31 17:44:41,882] Trial 0 finished with value: -85.75588339575638 and parameters: {'encoder_type': 'Target', 'model': 'SVR', 'svr_C': 1.7355997261121099, 'svr_kernel': 'linear', 'svr_degree': 5, 'svr_gamma': 'scale', 'svr_epsilon': 0.05667219275542052, 'svr_tol': 0.00020805854185682276, 'svr_max_iter': 7271}. Best is trial 0 with value: -85.75588339575638.
[I 2025-10-31 17:45:04,129] Trial 1 finished with value: 0.8033030556919448 and parameters: {'encoder_type': 'Target', 'model': 'Lasso', 'lasso_alpha': 0.11344276866736047, 'lasso_max_iter': 3668, 'lasso_tol': 1.7403119524609146e-05, 'lasso_selection': 'random'}. Best is trial 1 with value: 0.8033030556919448.
[I 2025-10-31 17:45:12,016] Trial 2 finished with value: 0.8033743887059867 and parameters: {'encoder_type': 'Target', 'model': 'Ridge', 'ridge_alpha': 0.0021264388588346167, 'ridge_solver': 'svd

In [79]:
mlflow.set_experiment(experiment_name='Linear Algos with Different Encoding & Scaling techniques')
for trial in study.trials:
    with mlflow.start_run(run_name=f"trial_{trial.number}"):
        mlflow.log_params(trial.params)
        
        if trial.value:
            mlflow.log_metric("objective", trial.value)
        
        mlflow.set_tag("state", trial.state.name)
        mlflow.set_tag("trial_number", trial.number)

2025/10/31 17:59:50 INFO mlflow.tracking.fluent: Experiment with name 'Linear Algos with Different Encoding & Scaling techniques' does not exist. Creating a new experiment.


🏃 View run trial_0 at: https://dagshub.com/akshatsharma2407/AutoNexusMlOps.mlflow/#/experiments/4/runs/87b5205af20943a99f03e2521144c0dc
🧪 View experiment at: https://dagshub.com/akshatsharma2407/AutoNexusMlOps.mlflow/#/experiments/4
🏃 View run trial_1 at: https://dagshub.com/akshatsharma2407/AutoNexusMlOps.mlflow/#/experiments/4/runs/9f67a811ff41429f873c24d0bf3bbbbe
🧪 View experiment at: https://dagshub.com/akshatsharma2407/AutoNexusMlOps.mlflow/#/experiments/4
🏃 View run trial_2 at: https://dagshub.com/akshatsharma2407/AutoNexusMlOps.mlflow/#/experiments/4/runs/5e1d6c808329497193f082c1fa224f9d
🧪 View experiment at: https://dagshub.com/akshatsharma2407/AutoNexusMlOps.mlflow/#/experiments/4
🏃 View run trial_3 at: https://dagshub.com/akshatsharma2407/AutoNexusMlOps.mlflow/#/experiments/4/runs/0cc20c016cab476a806fd85c622a6e43
🧪 View experiment at: https://dagshub.com/akshatsharma2407/AutoNexusMlOps.mlflow/#/experiments/4
🏃 View run trial_4 at: https://dagshub.com/akshatsharma2407/AutoNexu