In [1]:
import mlflow
#resetting the tracking URI
mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [2]:
#checking the tracking URI
import mlflow
tracking_uri = mlflow.get_tracking_uri()
print("MLflow Tracking URI:", tracking_uri)

MLflow Tracking URI: http://127.0.0.1:5000


In [3]:
#importing the datasets
import pandas as pd

wine = pd.read_csv('wine.csv')

print(wine.info())
print(wine.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         1599 non-null   float64
 1   volatile acidity      1599 non-null   float64
 2   citric acid           1599 non-null   float64
 3   residual sugar        1599 non-null   float64
 4   chlorides             1599 non-null   float64
 5   free sulfur dioxide   1599 non-null   float64
 6   total sulfur dioxide  1599 non-null   float64
 7   density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   sulphates             1599 non-null   float64
 10  alcohol               1599 non-null   float64
 11  quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB
None
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         

In [4]:
#mlflow.end_run()

In [5]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import mlflow
import mlflow.sklearn
import pickle

# Load the wine dataset
wine_data = pd.read_csv('wine.csv')

# Separate features and target variable
X = wine_data.drop(columns=['quality'])
y = wine_data['quality']

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define and train the linear regression model
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

# Evaluate the model
train_mse = mean_squared_error(y_train, lr_model.predict(X_train_scaled))
test_mse = mean_squared_error(y_test, lr_model.predict(X_test_scaled))

# Start MLflow run
with mlflow.start_run():

    # Log hyperparameters
    mlflow.log_param("model", "linear_regression")

    # Log metrics
    mlflow.log_metric("train_mse", train_mse)
    mlflow.log_metric("test_mse", test_mse)

    # Log the model to the Model Registry
    mlflow.sklearn.log_model(
        sk_model=lr_model,
        artifact_path="linear_regression_model",
        registered_model_name="wine_quality_model"
    )

    # Get the URI of the logged model
    model_uri = mlflow.get_artifact_uri("linear_regression_model")

    # Save the trained model using pickle
    with open("linear_regression_model.pkl", "wb") as file:
        pickle.dump(lr_model, file)

print("Model URI:", model_uri)
print("done")


Registered model 'wine_quality_model' already exists. Creating a new version of this model...
2024/05/21 12:55:19 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: wine_quality_model, version 4


Model URI: file:///Users/alaynecross/mlruns/0/73a8d14a62374e03ac68e340990d76a7/artifacts/linear_regression_model
done


Created version '4' of model 'wine_quality_model'.
