# Modules import

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder,LabelEncoder,MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
import os
import joblib
import mlflow
import json
import dagshub


In [2]:
survey_df=pd.read_csv('survey_data_feature_engineered.csv')
survey_df.head()

Unnamed: 0,respondent_id,gender,zone,occupation,income_levels,consume_frequency(weekly),current_brand,preferable_consumption_size,awareness_of_other_brands,reasons_for_choosing_brands,flavor_preference,purchase_channel,packaging_preference,health_concerns,typical_consumption_situations,price_range,age_group,cf_ab_score,zas_score,bsi
0,R00001,M,Urban,Working Professional,<10L,3-4 times,Newcomer,Medium (500 ml),0 to 1,Price,Traditional,Online,Simple,Medium (Moderately health-conscious),"Active (eg. Sports, gym)",100-150,26-35,0.67,3,1
1,R00002,F,Metro,Working Professional,> 35L,5-7 times,Established,Medium (500 ml),2 to 4,Quality,Exotic,Retail Store,Premium,Medium (Moderately health-conscious),Social (eg. Parties),200-250,46-55,0.6,20,0
2,R00003,F,Rural,Working Professional,> 35L,3-4 times,Newcomer,Medium (500 ml),2 to 4,Availability,Traditional,Retail Store,Premium,Medium (Moderately health-conscious),"Active (eg. Sports, gym)",200-250,36-45,0.5,5,0
3,R00004,F,Urban,Working Professional,16L - 25L,5-7 times,Newcomer,Medium (500 ml),0 to 1,Brand Reputation,Exotic,Online,Eco-Friendly,Low (Not very concerned),"Active (eg. Sports, gym)",150-200,26-35,0.75,9,0
4,R00005,M,Metro,Student,Not Reported,3-4 times,Established,Medium (500 ml),0 to 1,Availability,Traditional,Online,Premium,Medium (Moderately health-conscious),"Active (eg. Sports, gym)",50-100,18-25,0.67,0,0


# Feature and Target variable

In [3]:
survey_df.shape

(29956, 20)

In [4]:
X=survey_df.drop(['respondent_id','price_range'],axis='columns')
y=survey_df['price_range']
X.shape

(29956, 18)

In [5]:
X.head()

Unnamed: 0,gender,zone,occupation,income_levels,consume_frequency(weekly),current_brand,preferable_consumption_size,awareness_of_other_brands,reasons_for_choosing_brands,flavor_preference,purchase_channel,packaging_preference,health_concerns,typical_consumption_situations,age_group,cf_ab_score,zas_score,bsi
0,M,Urban,Working Professional,<10L,3-4 times,Newcomer,Medium (500 ml),0 to 1,Price,Traditional,Online,Simple,Medium (Moderately health-conscious),"Active (eg. Sports, gym)",26-35,0.67,3,1
1,F,Metro,Working Professional,> 35L,5-7 times,Established,Medium (500 ml),2 to 4,Quality,Exotic,Retail Store,Premium,Medium (Moderately health-conscious),Social (eg. Parties),46-55,0.6,20,0
2,F,Rural,Working Professional,> 35L,3-4 times,Newcomer,Medium (500 ml),2 to 4,Availability,Traditional,Retail Store,Premium,Medium (Moderately health-conscious),"Active (eg. Sports, gym)",36-45,0.5,5,0
3,F,Urban,Working Professional,16L - 25L,5-7 times,Newcomer,Medium (500 ml),0 to 1,Brand Reputation,Exotic,Online,Eco-Friendly,Low (Not very concerned),"Active (eg. Sports, gym)",26-35,0.75,9,0
4,M,Metro,Student,Not Reported,3-4 times,Established,Medium (500 ml),0 to 1,Availability,Traditional,Online,Premium,Medium (Moderately health-conscious),"Active (eg. Sports, gym)",18-25,0.67,0,0


In [6]:
y.head()

0    100-150
1    200-250
2    200-250
3    150-200
4     50-100
Name: price_range, dtype: object

# Data Splitting

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.25, random_state=42)
X_train.shape

(22467, 18)

In [8]:
X_test.shape

(7489, 18)

# Feature Encoding

In [9]:
X_test.columns

Index(['gender', 'zone', 'occupation', 'income_levels',
       'consume_frequency(weekly)', 'current_brand',
       'preferable_consumption_size', 'awareness_of_other_brands',
       'reasons_for_choosing_brands', 'flavor_preference', 'purchase_channel',
       'packaging_preference', 'health_concerns',
       'typical_consumption_situations', 'age_group', 'cf_ab_score',
       'zas_score', 'bsi'],
      dtype='object')

In [10]:
column_data = {}
column_data["columns"]= X.columns.tolist()

for col in X.columns:
    print("Column: ",col)
    print(X[col].unique())
    print()
    column_data[col] = X[col].unique().tolist()

with open("column_datas.json","w") as json_file:
    json.dump(column_data, json_file, indent=4)
    

Column:  gender
['M' 'F']

Column:  zone
['Urban' 'Metro' 'Rural' 'Semi-Urban']

Column:  occupation
['Working Professional' 'Student' 'Entrepreneur' 'Retired']

Column:  income_levels
['<10L' '> 35L' '16L - 25L' 'Not Reported' '10L - 15L' '26L - 35L']

Column:  consume_frequency(weekly)
['3-4 times' '5-7 times' '0-2 times']

Column:  current_brand
['Newcomer' 'Established']

Column:  preferable_consumption_size
['Medium (500 ml)' 'Large (1 L)' 'Small (250 ml)']

Column:  awareness_of_other_brands
['0 to 1' '2 to 4' 'above 4']

Column:  reasons_for_choosing_brands
['Price' 'Quality' 'Availability' 'Brand Reputation']

Column:  flavor_preference
['Traditional' 'Exotic']

Column:  purchase_channel
['Online' 'Retail Store']

Column:  packaging_preference
['Simple' 'Premium' 'Eco-Friendly']

Column:  health_concerns
['Medium (Moderately health-conscious)' 'Low (Not very concerned)'
 'High (Very health-conscious)']

Column:  typical_consumption_situations
['Active (eg. Sports, gym)' 'Social

In [11]:
y.unique()

array(['100-150', '200-250', '150-200', '50-100'], dtype=object)

In [12]:
target_encoder = OrdinalEncoder(categories=[[ '50-100','100-150', '150-200','200-250']])
y_train_encoded = target_encoder.fit_transform(y_train.values.reshape(-1, 1)).reshape(-1)
y_test_encoded = target_encoder.transform(y_test.values.reshape(-1, 1)).reshape(-1)

categorical_columns = X.select_dtypes(include=['object', 'category']).columns
ordinal_features = ['age_group', 'income_levels', 'health_concerns',
                    'consume_frequency(weekly)', 'preferable_consumption_size']
onehot_features = [col for col in categorical_columns if col not in ordinal_features]


age_group_order=['18-25','26-35','36-45' ,'46-55','56-70','70+']# added age group 70+
income_levels_order=[ 'Not Reported','<10L' ,'10L - 15L', '16L - 25L', '26L - 35L' ,'> 35L']
health_concerns_order=['Low (Not very concerned)','Medium (Moderately health-conscious)', 
                       'High (Very health-conscious)']
consume_frequency_order=['0-2 times','3-4 times','5-7 times' ]
consumption_size_order=['Small (250 ml)','Medium (500 ml)','Large (1 L)']


preprocessor = ColumnTransformer(
    transformers=[
        ('ordinal', OrdinalEncoder(categories=[
            age_group_order,
            income_levels_order,
            health_concerns_order,
            consume_frequency_order,
            consumption_size_order
        ]), ordinal_features),
        
        ('onehot', OneHotEncoder(handle_unknown='ignore'), onehot_features)
    ]
)

# Model Training

In [13]:
model_params = {
    'GaussianNaiveBayes': {
        'model': GaussianNB(),
        'params': {}
    },
    'LogisticRegression': {
        'model': LogisticRegression(max_iter=100),
        'params': {
            'classifier__C': [0.1, 1, 10],
            'classifier__solver': ['liblinear', 'saga']
        }
    },
    'SVC': {
        'model': SVC(random_state=42),
        'params': {
            'classifier__C': [0.1, 1, 10],
            'classifier__kernel': ['linear', 'rbf']
        }
    },
    'RandomForest': {
        'model': RandomForestClassifier(random_state=42),
        'params': {
            'classifier__n_estimators': [50, 100, 200],
            'classifier__max_depth': [None, 10, 20],
        }
    },
    'XGBoost': {
        'model': xgb.XGBClassifier(random_state=42),
        'params': {
            'classifier__n_estimators': [50, 100, 200],
            'classifier__max_depth': [3, 6, 9],
            'classifier__learning_rate': [0.01, 0.1, 0.3],
            'classifier__subsample': [0.8, 1.0]
        }
    },
    'LightGBM': {
        'model': lgb.LGBMClassifier(random_state=42),
        'params': {
            'classifier__n_estimators': [50, 100, 200],
            'classifier__max_depth': [3, 6, 9],
            'classifier__learning_rate': [0.01, 0.1, 0.3],
            'classifier__num_leaves': [31, 63]
        }
    }
}


In [14]:
model_results = []
models={}
os.makedirs('models', exist_ok=True)

In [15]:



# Iterate through models for hyperparameter tuning and training
for model_name, config in model_params.items():
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('scaler', MinMaxScaler()),
        ('classifier', config['model'])
    ])
    
    # Apply RandomizedSearchCV for hyperparameter tuning
    search = RandomizedSearchCV(
        pipeline, 
        param_distributions=config['params'], 
        n_iter=10, 
        scoring='accuracy', 
        cv=5, 
        random_state=42, 
        verbose=1,
        n_jobs=-2
    )
    search.fit(X_train, y_train_encoded)

    print(f"Model: {model_name}, Best Score: {search.best_score_}, Best Params: {search.best_params_}")
    
    # Test the best model on the test set
    best_model = search.best_estimator_  # Get the best pipeline from the search
    y_pred = best_model.predict(X_test)
    test_accuracy = accuracy_score(y_test_encoded, y_pred)
    
    # Get detailed classification report
    clf_report = classification_report(y_test_encoded, y_pred, output_dict=True)  # Store as dictionary
    
    # Store the results in the list
    model_results.append({
        'Model': model_name,
        'Best Score (CV)': search.best_score_,
        'Best Params': search.best_params_,
        'Test Accuracy': test_accuracy,
        'Classification Report': clf_report,
    })
    
    

    
    model_filename = f'models/{model_name}_best_model.joblib'

    
    joblib.dump(best_model, model_filename)
    print(f"Model saved to {model_filename}")


Fitting 5 folds for each of 1 candidates, totalling 5 fits




Model: GaussianNaiveBayes, Best Score: 0.5598874419794189, Best Params: {}
Model saved to models/GaussianNaiveBayes_best_model.joblib
Fitting 5 folds for each of 6 candidates, totalling 30 fits




Model: LogisticRegression, Best Score: 0.8455068166661069, Best Params: {'classifier__solver': 'saga', 'classifier__C': 10}
Model saved to models/LogisticRegression_best_model.joblib
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Model: SVC, Best Score: 0.8667381124235088, Best Params: {'classifier__kernel': 'rbf', 'classifier__C': 10}
Model saved to models/SVC_best_model.joblib
Fitting 5 folds for each of 9 candidates, totalling 45 fits




Model: RandomForest, Best Score: 0.8800023197832043, Best Params: {'classifier__n_estimators': 200, 'classifier__max_depth': None}
Model saved to models/RandomForest_best_model.joblib
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Model: XGBoost, Best Score: 0.9190367828271857, Best Params: {'classifier__subsample': 0.8, 'classifier__n_estimators': 100, 'classifier__max_depth': 6, 'classifier__learning_rate': 0.3}
Model saved to models/XGBoost_best_model.joblib
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 78
[LightGBM] [Info] Number of data points in the train set: 22467, number of used features: 34
[LightGBM] [Info] Start training from score -2.100810
[LightGBM] [Info] Start training from score -1.343386
[LightGBM] [Info] Start training from score -1.228925
[LightGBM]

## Save Target variable encoder


In [16]:
encoder_filename = "models/target_encoder.joblib"
joblib.dump(target_encoder, encoder_filename)
print(f"Encoder saved as {encoder_filename}")

Encoder saved as models/target_encoder.joblib


# Model Comparison

In [17]:
results_df = pd.DataFrame(model_results)
results_df.sort_values('Test Accuracy',ascending=False)

Unnamed: 0,Model,Best Score (CV),Best Params,Test Accuracy,Classification Report
5,LightGBM,0.921841,"{'classifier__num_leaves': 31, 'classifier__n_...",0.922553,"{'0.0': {'precision': 0.9078091106290672, 'rec..."
4,XGBoost,0.919037,"{'classifier__subsample': 0.8, 'classifier__n_...",0.920016,"{'0.0': {'precision': 0.9208472686733556, 'rec..."
3,RandomForest,0.880002,"{'classifier__n_estimators': 200, 'classifier_...",0.882227,"{'0.0': {'precision': 0.9199057714958775, 'rec..."
2,SVC,0.866738,"{'classifier__kernel': 'rbf', 'classifier__C':...",0.872346,"{'0.0': {'precision': 0.8890122086570478, 'rec..."
1,LogisticRegression,0.845507,"{'classifier__solver': 'saga', 'classifier__C'...",0.84524,"{'0.0': {'precision': 0.8710059171597633, 'rec..."
0,GaussianNaiveBayes,0.559887,{},0.564428,"{'0.0': {'precision': 0.39679633867276887, 're..."


In [18]:
for index, row in results_df.iterrows():
    print(f"\nModel: {row['Model']}")
    report = row['Classification Report']
    for label, metrics in report.items():
        if isinstance(metrics, dict):  # Only for labels, not support / accuracy
            print(f"  {label}: Precision={metrics['precision']}, Recall={metrics['recall']}, F1-Score={metrics['f1-score']}")


Model: GaussianNaiveBayes
  0.0: Precision=0.39679633867276887, Recall=0.956953642384106, F1-Score=0.5609835004852798
  1.0: Precision=0.4501541623843782, Recall=0.22694300518134716, F1-Score=0.3017568033069239
  2.0: Precision=0.6100254885301615, Recall=0.32298695456590193, F1-Score=0.4223529411764706
  3.0: Precision=0.6987951807228916, Recall=0.9069958847736626, F1-Score=0.7893982808022922
  macro avg: Precision=0.5389427925775501, Recall=0.6034698717262543, F1-Score=0.5186228814427416
  weighted avg: Precision=0.5718325499263659, Recall=0.5644278274803044, F1-Score=0.5271424879699413

Model: LogisticRegression
  0.0: Precision=0.8710059171597633, Recall=0.8123620309050773, F1-Score=0.8406624785836665
  1.0: Precision=0.8021201413427562, Recall=0.8233160621761658, F1-Score=0.8125799028381488
  2.0: Precision=0.7995535714285714, Recall=0.805668016194332, F1-Score=0.8025991485547839
  3.0: Precision=0.913743293437887, Recall=0.9111111111111111, F1-Score=0.91242530393571
  macro avg: 

In [19]:
for index, row in results_df.iterrows():
    print(f"\nModel: {row['Model']}")
    report = row['Best Params']
    for param, value in report.items():
        print(f"  {param}: {value}")


Model: GaussianNaiveBayes

Model: LogisticRegression
  classifier__solver: saga
  classifier__C: 10

Model: SVC
  classifier__kernel: rbf
  classifier__C: 10

Model: RandomForest
  classifier__n_estimators: 200
  classifier__max_depth: None

Model: XGBoost
  classifier__subsample: 0.8
  classifier__n_estimators: 100
  classifier__max_depth: 6
  classifier__learning_rate: 0.3

Model: LightGBM
  classifier__num_leaves: 31
  classifier__n_estimators: 100
  classifier__max_depth: 6
  classifier__learning_rate: 0.3


# ML FLOW

In [20]:
results_df

Unnamed: 0,Model,Best Score (CV),Best Params,Test Accuracy,Classification Report
0,GaussianNaiveBayes,0.559887,{},0.564428,"{'0.0': {'precision': 0.39679633867276887, 're..."
1,LogisticRegression,0.845507,"{'classifier__solver': 'saga', 'classifier__C'...",0.84524,"{'0.0': {'precision': 0.8710059171597633, 'rec..."
2,SVC,0.866738,"{'classifier__kernel': 'rbf', 'classifier__C':...",0.872346,"{'0.0': {'precision': 0.8890122086570478, 'rec..."
3,RandomForest,0.880002,"{'classifier__n_estimators': 200, 'classifier_...",0.882227,"{'0.0': {'precision': 0.9199057714958775, 'rec..."
4,XGBoost,0.919037,"{'classifier__subsample': 0.8, 'classifier__n_...",0.920016,"{'0.0': {'precision': 0.9208472686733556, 'rec..."
5,LightGBM,0.921841,"{'classifier__num_leaves': 31, 'classifier__n_...",0.922553,"{'0.0': {'precision': 0.9078091106290672, 'rec..."


In [21]:
model_results

[{'Model': 'GaussianNaiveBayes',
  'Best Score (CV)': 0.5598874419794189,
  'Best Params': {},
  'Test Accuracy': 0.5644278274803044,
  'Classification Report': {'0.0': {'precision': 0.39679633867276887,
    'recall': 0.956953642384106,
    'f1-score': 0.5609835004852798,
    'support': 906},
   '1.0': {'precision': 0.4501541623843782,
    'recall': 0.22694300518134716,
    'f1-score': 0.3017568033069239,
    'support': 1930},
   '2.0': {'precision': 0.6100254885301615,
    'recall': 0.32298695456590193,
    'f1-score': 0.4223529411764706,
    'support': 2223},
   '3.0': {'precision': 0.6987951807228916,
    'recall': 0.9069958847736626,
    'f1-score': 0.7893982808022922,
    'support': 2430},
   'accuracy': 0.5644278274803044,
   'macro avg': {'precision': 0.5389427925775501,
    'recall': 0.6034698717262543,
    'f1-score': 0.5186228814427416,
    'support': 7489},
   'weighted avg': {'precision': 0.5718325499263659,
    'recall': 0.5644278274803044,
    'f1-score': 0.52714248796994

## DagsHub setup

In [22]:
dagshub_url="https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow"

In [23]:
dagshub.init(repo_owner="2411chirag",repo_name="Codebasics-VI-Codex",mlflow=True)
os.environ["MLFLOW_TRACKING_USERNAME"] = "2411chirag"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "a09b374619b99ba742834c5559a479df9477c28c"
os.environ["MLFLOW_TRACKING_URI"] = dagshub_url



In [24]:
experiment_name="Codex Energy Drink Survey"
# experiment_id = mlflow.create_experiment(experiment_name)

In [25]:
mlflow.set_tracking_uri(dagshub_url)

In [26]:
mlflow.set_experiment(experiment_name)
# mlflow.set_tracking_uri("http://127.0.0.1:5000/")



<Experiment: artifact_location='mlflow-artifacts:/d1aba6f6fae64f1b897ed8a9e82603ef', creation_time=1732126104695, experiment_id='0', last_update_time=1732126104695, lifecycle_stage='active', name='Codex Energy Drink Survey', tags={'mlflow.sharedViewState.d537fdc0c959fa49a2e0b67727b54010fcd4ffd194d195a2598ac3fdf5c6e596': '{"searchFilter":"","orderByKey":"attributes.start_time","orderByAsc":false,"startTime":"ALL","lifecycleFilter":"Active","datasetsFilter":[],"modelVersionFilter":"All '
                                                                                            'Runs","selectedColumns":["attributes.`Source`","attributes.`Models`","attributes.`Dataset`"],"runsExpanded":{},"runsPinned":[],"runsHidden":[],"runsHiddenMode":"FIRST_10_RUNS","viewMaximized":false,"runListHidden":false,"isAccordionReordered":false,"groupBy":"","groupsExpanded":{}}'}>

## Upload

In [27]:
for i,model_data in enumerate(model_results):
    model_name=model_data['Model']
    model_filename = f'models/{model_name}_best_model.pkl'
    with mlflow.start_run(run_name=model_name):
        # Check if model file exists
        if os.path.exists(model_filename):
            # Load the best trained model from Phase 1
            best_model = joblib.load(model_filename)
            
            # Log the model and associated information to MLflow
            mlflow.log_param("Model Name", model_name)
            
            # Log hyperparameters (if available)
            for param, value in model_data['Best Params'].items():
                mlflow.log_param(param, value) 
            
            # Log the model itself (depending on model type)
            if model_name == 'XGBoost':
                mlflow.sklearn.log_model(best_model, model_name)#save with sklearn for pipeline
            elif model_name == 'LightGBM':
                mlflow.lightgbm.log_model(best_model, model_name)
            else:
                mlflow.sklearn.log_model(best_model, model_name)
        
            
            
            report = model_data['Classification Report']
            for label, metrics in report.items():
                if label=='accuracy':
                    continue
                for metric,value in metrics.items():
                    mlflow.log_metric(f"{metric}_{label}",value)
                    
            # Log test accuracy
            mlflow.log_metric("test_accuracy",model_data['Test Accuracy'])
            
            report_filename = f'models/{model_name}_classification_report.json'
            with open(report_filename, 'w') as f:
                json.dump(report, f)
            
            mlflow.log_artifact(report_filename)






🏃 View run GaussianNaiveBayes at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0/runs/c911bfcc542a4d57a13b5d606fc7fef0
🧪 View experiment at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0




🏃 View run LogisticRegression at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0/runs/9edb0815c48d400b86f2b55d45b82c39
🧪 View experiment at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0




🏃 View run SVC at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0/runs/61b48a3b7c7d4a5cafa973b4098a7a96
🧪 View experiment at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0




🏃 View run RandomForest at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0/runs/85775f469cb14fbab33f9336077378c5
🧪 View experiment at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0




🏃 View run XGBoost at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0/runs/b71ade1526be478bab59142b50ceeeab
🧪 View experiment at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0




🏃 View run LightGBM at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0/runs/afcccf56bf30442f882081c0ee293eae
🧪 View experiment at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0


### LOG ENCODER

In [28]:
with mlflow.start_run(run_name="Target Variable Encoder"):
    mlflow.log_param("Encoder Name", "Target Oridnal Encoder")
    encoder_filename = "models/target_encoder.joblib"
    joblib.dump(target_encoder, encoder_filename)
    mlflow.log_artifact(encoder_filename)

🏃 View run Target Variable Encoder at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0/runs/69d5119d5ca34b88a31976e63edb2332
🧪 View experiment at: https://dagshub.com/2411chirag/Codebasics-VI-Codex.mlflow/#/experiments/0


### REGISTER MODEL

In [30]:
model_name="XGBoost"
run_id=input(f"Enter run id for {model_name}:")
model_uri=f"runs:/{run_id}/{model_name}"
result= mlflow.register_model(model_uri,model_name,)

Enter run id for XGBoost: b71ade1526be478bab59142b50ceeeab


Registered model 'XGBoost' already exists. Creating a new version of this model...
2024/11/21 10:45:20 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGBoost, version 3
Created version '3' of model 'XGBoost'.


In [32]:
r=search.predict(X_test[0:1])
r

array([1.])

In [36]:
target_encoder.inverse_transform(r.reshape(-1, 1))[0,0]


'100-150'