In [None]:
# install sklearn 
!pip3 install scikit-learn

In [None]:
# for visualization install seaborn
!pip3 install seaborn

In [None]:
# !pip install pandas

In [None]:
import pandas as pd
import numpy as np

from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn import metrics

import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
df = pd.read_csv("../data/winequality-red.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
# visualize the spread of quality of the wine
sns.histplot(df['quality'])

In [None]:
# Split the data
X = df.drop('quality', axis = 1)
y = df['quality']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=43)

In [None]:
X_train.shape

In [None]:
df.shape

In [None]:
y_train

In [None]:
# Training model

alpha = 0.5
l1 = 0.5

lr = ElasticNet(alpha=alpha, l1_ratio=l1)

In [None]:
lr.fit(X_train, y_train)

In [None]:
def eval_metrics(ground_truth, pred):
    
    rmse = metrics.mean_squared_error(ground_truth, pred, squared=False)
    mae = metrics.mean_absolute_error(ground_truth, pred)
    r2 = metrics.r2_score(ground_truth, pred)
    
    return rmse, mae, r2

In [None]:
y_pred = lr.predict(X_train)

In [None]:
rmse, mae, r2 = eval_metrics(y_train, y_pred)

In [None]:
print(f"Training metrics: \nRMSE:{rmse} \nMAE:{mae} \nR2:{r2}")

In [None]:
### How to use MLFlow logging

In [None]:
import mlflow
import pandas as pd
import numpy as np

from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn import metrics

import matplotlib.pyplot as plt
import seaborn as sns


def eval_metrics(ground_truth, pred):
    
    rmse = metrics.mean_squared_error(ground_truth, pred, squared=False)
    mae = metrics.mean_absolute_error(ground_truth, pred)
    r2 = metrics.r2_score(ground_truth, pred)
    
    return rmse, mae, r2

df = pd.read_csv("../data/winequality-red.csv")

# Split the data
X = df.drop('quality', axis = 1)
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=43)
X_train.shape


############ mlflow training

with mlflow.start_run():
    
    # Training model
    alpha = 0.5
    l1 = 0.5
    lr = ElasticNet(alpha=alpha, l1_ratio=l1)
    lr.fit(X_train, y_train)
    
    y_pred = lr.predict(X_train)

    rmse, mae, r2 = eval_metrics(y_train, y_pred)
    print(f"Training metrics: \nRMSE:{rmse} \nMAE:{mae} \nR2:{r2}")
    
    mlflow.log_param("Alpha", alpha)
    mlflow.log_param("L1 ratio", l1)
    
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MAE", mae)
    


In [None]:
### Setting experiments, tags, run names

In [None]:
import mlflow
import pandas as pd
import numpy as np

from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn import metrics

import matplotlib.pyplot as plt
import seaborn as sns


def eval_metrics(ground_truth, pred):
    
    rmse = metrics.mean_squared_error(ground_truth, pred, squared=False)
    mae = metrics.mean_absolute_error(ground_truth, pred)
    r2 = metrics.r2_score(ground_truth, pred)
    
    return rmse, mae, r2

df = pd.read_csv("../data/winequality-red.csv")

# Split the data
X = df.drop('quality', axis = 1)
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=43)
X_train.shape


############ mlflow training

experiment_name = "Experiment-2"
current_run_name = "first custom run"

tags = {
    "Demo":"True",
    "Created by": "User1"
}

mlflow.set_experiment(experiment_name)

with mlflow.start_run(run_name = current_run_name):
    
    # Training model
    alpha = 0.5
    l1 = 0.25
    lr = ElasticNet(alpha=alpha, l1_ratio=l1)
    lr.fit(X_train, y_train)
    
    y_pred = lr.predict(X_train)

    rmse, mae, r2 = eval_metrics(y_train, y_pred)
    print(f"Training metrics: \nRMSE:{rmse} \nMAE:{mae} \nR2:{r2}")
    
    mlflow.log_param("Alpha", alpha)
    mlflow.log_param("L1 ratio", l1)
    
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MAE", mae)
    
    mlflow.set_tags(tags)
    


In [None]:
### adding Artifac location

import mlflow
import pandas as pd
import numpy as np

from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn import metrics

import matplotlib.pyplot as plt
import seaborn as sns


def eval_metrics(ground_truth, pred):
    
    rmse = metrics.mean_squared_error(ground_truth, pred, squared=False)
    mae = metrics.mean_absolute_error(ground_truth, pred)
    r2 = metrics.r2_score(ground_truth, pred)
    
    return rmse, mae, r2

df = pd.read_csv("../data/winequality-red.csv")

# Split the data
X = df.drop('quality', axis = 1)
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=43)
X_train.shape


############ mlflow training

experiment_name = "Experiment-3"
current_run_name = "second custom run"
artifact_location = "/Users/saumyagoyal/JupyterNotebook/MLCon/MLCon_23/1_mlflow/artifact_store"

tags = {
    "Demo":"True",
    "Created by": "User1"
}

# mlflow.create_experiment(experiment_name,artifact_location)
mlflow.set_experiment(experiment_name)

with mlflow.start_run(run_name = current_run_name):
    
    # Training model
    alpha = 0.1
    l1 = 0.5
    lr = ElasticNet(alpha=alpha, l1_ratio=l1)
    lr.fit(X_train, y_train)
    
    y_pred = lr.predict(X_train)

    rmse, mae, r2 = eval_metrics(y_train, y_pred)
    print(f"Training metrics: \nRMSE:{rmse} \nMAE:{mae} \nR2:{r2}")
    
    mlflow.log_param("Alpha", alpha)
    mlflow.log_param("L1 ratio", l1)
    
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MAE", mae)
    
    mlflow.set_tags(tags)
    mlflow.sklearn.log_model(lr, "Linear regression model - 1")
    


In [None]:
### Logging model

### adding Artifac location

import mlflow
import pandas as pd
import numpy as np

from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn import metrics

import matplotlib.pyplot as plt
import seaborn as sns


def eval_metrics(ground_truth, pred):
    
    rmse = metrics.mean_squared_error(ground_truth, pred, squared=False)
    mae = metrics.mean_absolute_error(ground_truth, pred)
    r2 = metrics.r2_score(ground_truth, pred)
    
    return rmse, mae, r2

df = pd.read_csv("../data/winequality-red.csv")

# Split the data
X = df.drop('quality', axis = 1)
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=43)
X_train.shape


############ mlflow training

experiment_name = "Experiment-2"
current_run_name = "log model v3"
artifact_location = "/Users/saumyagoyal/JupyterNotebook/MLCon/MLCon_23/1_mlflow/artifact_store2"

tags = {
    "Demo":"True",
    "Created by": "User1"
}

# mlflow.create_experiment(experiment_name,artifact_location)
mlflow.set_experiment(experiment_name)

with mlflow.start_run(run_name = current_run_name):
    
    # Training model
    alpha = 0.1
    l1 = 0.5
    lr = ElasticNet(alpha=alpha, l1_ratio=l1)
    lr.fit(X_train, y_train)
    
    y_pred = lr.predict(X_train)

    rmse, mae, r2 = eval_metrics(y_train, y_pred)
    print(f"Training metrics: \nRMSE:{rmse} \nMAE:{mae} \nR2:{r2}")
    
    mlflow.log_param("Alpha", alpha)
    mlflow.log_param("L1 ratio", l1)
    
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MAE", mae)
    
    mlflow.set_tags(tags)
    
    mlflow.sklearn.log_model(lr, "Linear regression model - 1")


### END