In [1]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import pandas as pd

In [2]:
#loadin the dataset
train = pd.read_csv('../data/prepared/train/train.csv')
train = train.drop(columns=['Unnamed: 0','Timestamp'], axis=1)

test = pd.read_csv('../data/prepared/test/test.csv')
test = test.drop(columns=['Unnamed: 0','Timestamp'], axis=1)


In [3]:
train.head(2)

Unnamed: 0,hour_of_day,day_of_week,year,Reading
0,13,1,2023,0.443044
1,23,1,2023,0.462793


In [4]:
#dividing the data into train and test sets
X_train = train.drop('Reading',axis=1)
y_train = train['Reading']

X_test = test.drop('Reading', axis=1)
y_test = test['Reading']

In [5]:
X_train.head(1)

Unnamed: 0,hour_of_day,day_of_week,year
0,13,1,2023


In [6]:
model = RandomForestRegressor()
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10]
}

In [12]:
# Start an MLflow run
with mlflow.start_run() as run:

    # Train a Random Forest regressor
    model = RandomForestRegressor(n_estimators=100, max_depth=10)
    model.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)

    # Log model parameters and metrics using MLflow
    mlflow.log_params(model.get_params())
    mlflow.log_metric("mse", mse)

    # Save the model with MLflow
    mlflow.sklearn.log_model(model, "random_forest_model")

    # Get the model URI
    model_uri = f"runs:/{run.info.run_id}/random_forest_model"

# Register the best model in the Model Registry
model_version = mlflow.register_model(model_uri, "Predictive Maintenance Model")

Successfully registered model 'Predictive Maintenance Model'.
Created version '1' of model 'Predictive Maintenance Model'.


In [11]:
mlflow.end_run()