In [1]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import dagshub
import mlflow
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [2]:
df = pd.read_csv("C:/Users/aksha/OneDrive/Desktop/cars_mlops_practice/sample_data.csv").select_dtypes(include=[int,float,bool])

In [3]:
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [4]:
xtest = test_data.drop(columns=['Price'])
ytest = test_data['Price'].copy()

In [5]:
scaler = StandardScaler()
scaler.set_output(transform="pandas")
scaler.fit(train_data)
train_processed_data = scaler.transform(train_data)
test_processed_data = scaler.transform(test_data)

In [6]:
xtrain = train_data.drop(columns="Price").values
ytrain = train_data["Price"].values

In [7]:
model = RandomForestRegressor()

model.fit(xtrain,ytrain)

ypred = model.predict(xtest)

mae = mean_absolute_error(ytest,ypred)
mse = mean_squared_error(ytest,ypred)



In [8]:
mlflow.set_tracking_uri('https://dagshub.com/akshatsharma2407/cars_ml_test.mlflow')

In [9]:
dagshub.init(repo_owner='akshatsharma2407', repo_name='cars_ml_test', mlflow=True)

In [22]:
model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': 1.0,
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [None]:
mlflow.set_experiment('first exp')
with mlflow.start_run(run_name='first run'):
    signature = mlflow.models.infer_signature(model_input=xtest,model_output=model.predict(xtest))
    mlflow.log_params(model.get_params())
    mlflow.log_metric('mae',mae)
    mlflow.log_metric('mse',mse)
    mlflow.sklearn.log_model(model,'model', signature=signature)



In [10]:
model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': 1.0,
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [22]:
from sklearn.model_selection import GridSearchCV

mlflow.set_experiment('grid search cv')

mlflow.sklearn.autolog()

params_grid = {
    'n_estimators' : [20,30],
    'max_depth' : [2,3,4]
}

with mlflow.start_run(run_name='outer',nested=True) as parent:
    grid_search = GridSearchCV(estimator=RandomForestRegressor(),param_grid=params_grid, cv=2)
    grid_search.fit(xtrain,ytrain)

with mlflow.start_run(run_name='best model') as best:
    signature = mlflow.models.infer_signature(model_input=xtrain, model_output=grid_search.best_estimator_.predict(xtrain))
    mlflow.log_params(grid_search.best_params_)
    mlflow.log_metric("best_score",grid_search.best_score_)
    mlflow.sklearn.log_model(grid_search.best_estimator_, 'model', signature=signature)

2025/10/26 14:37:33 INFO mlflow.sklearn.utils: Logging the 5 best runs, one run will be omitted.


🏃 View run outer at: https://dagshub.com/akshatsharma2407/cars_ml_test.mlflow/#/experiments/2/runs/58c13b96abc24396b01875b5261f34d1
🧪 View experiment at: https://dagshub.com/akshatsharma2407/cars_ml_test.mlflow/#/experiments/2
🏃 View run best model at: https://dagshub.com/akshatsharma2407/cars_ml_test.mlflow/#/experiments/2/runs/50d231d369e849d5aef26061aec33521
🧪 View experiment at: https://dagshub.com/akshatsharma2407/cars_ml_test.mlflow/#/experiments/2
