In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import mlflow
import mlflow.sklearn


mlflow.set_tracking_uri('http://0.0.0.0:5000/:5000')
mlflow.set_experiment('mlflow_dvc')

X = pd.read_csv('features.csv', usecols=['f1','f2','f3','f4'])
Y = pd.read_csv('label.csv', usecols=['label'])

with mlflow.start_run(run_name="experiment") as run: 
    # tracking run parameters
    mlflow.log_param("compute", 'local')
    mlflow.log_param("dataset", 'example')
    mlflow.log_param("dataset_version", '2.0')
    mlflow.log_param("dataset_path", 's3://score-journey-boluo/data/')
    mlflow.log_param("algo", 'random forest example')
    
    # tracking any additional hyperparameters for reproducibility
    n_estimators = 5
    mlflow.log_param("n_estimators", n_estimators)

    # train the model
    rf = RandomForestRegressor(n_estimators=n_estimators)
    rf.fit(X, Y)
    Y_pred = rf.predict(X)

    # automatically save the model artifact to the S3 bucket for later deployment
    mlflow.sklearn.log_model(rf, "rf-baseline-model")

    # log model performance using any metric
    mse = mean_squared_error(Y, Y_pred)
    mlflow.log_metric("mse", mse)
    
    mlflow.end_run()

MlflowException: API request to endpoint /api/2.0/mlflow/experiments/list failed with error code 404 != 200. Response body: '<!doctype html>
<html lang=en>
<title>404 Not Found</title>
<h1>Not Found</h1>
<p>The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.</p>
'

In [27]:
!pip install boto3

