In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from evidently.report import Report
from evidently.metric_preset import ClassificationPreset
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression

import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

In [4]:
df = pd.read_csv("datasets/student-dataset-v1.csv")

le = LabelEncoder()
df['Result'] = le.fit_transform(df['Result'])

X = df.drop(columns=['Result'])  
y = df['Result']  

# Encode categorical variables 
X = pd.get_dummies(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

X_train['prediction'] = lr_model.predict_proba(X_train)[:, 1]
X_test['prediction'] = lr_model.predict_proba(X_test)[:, 1]

X_train['target'] = y_train
X_test['target'] = y_test

lr_class_report = Report(metrics=[ClassificationPreset()])
lr_class_report.run(reference_data=X_train, current_data=X_test)

lr_class_report.save("json_reports/lr_report_v1.json")

# Load JSON data
with open('json_reports/lr_report_v1.json', 'r') as file:
    data = json.load(file)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
lr_accuracy_v1 = None
metric_results = data['suite']['metric_results']

for result in metric_results:
    if 'current' in result:
        lr_accuracy_v1 = result['current'].get('accuracy')
        if lr_accuracy_v1 is not None:
            break

if lr_accuracy_v1 is not None:
    print("Logistic Regression v1 Accuracy:", lr_accuracy_v1)
else:
    print("Accuracy not found in the JSON data.")

Logistic Regression v1 Accuracy: 0.9655172413793104


In [None]:
#mlflow.set_tracking_uri("http://localhost:5000")  # o la IP donde esté el Docker

In [12]:
# Log into MLflow
client = MlflowClient()

# Set experiment
mlflow.set_experiment('Monitoring with EvidentlyAI')

# Start new run
with mlflow.start_run() as run: 
    # Log metrics
    mlflow.log_metric("accuracy", lr_accuracy_v1)
    
    # Log the model
    mlflow.sklearn.log_model(lr_model, "logistic_regression_model")

    # Print run info
    print(run.info)



<RunInfo: artifact_uri='file:///c:/Users/guill/OneDrive/Documentos/simplegit/ITBA/mlruns/869127321442836481/1d6465ef11984e5d8f7af5612a33e005/artifacts', end_time=None, experiment_id='869127321442836481', lifecycle_stage='active', run_id='1d6465ef11984e5d8f7af5612a33e005', run_name='spiffy-squid-391', run_uuid='1d6465ef11984e5d8f7af5612a33e005', start_time=1747358261816, status='RUNNING', user_id='guill'>
🏃 View run spiffy-squid-391 at: http://localhost:5000/#/experiments/869127321442836481/runs/1d6465ef11984e5d8f7af5612a33e005
🧪 View experiment at: http://localhost:5000/#/experiments/869127321442836481


In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from evidently.report import Report
from evidently.metric_preset import ClassificationPreset
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

# Load and preprocess data
df = pd.read_csv("datasets/student-dataset-v1.csv")

# Encode target variable
le = LabelEncoder()
df['Result'] = le.fit_transform(df['Result'])

# Features and target
X = df.drop(columns=['Result'])
y = df['Result']

# Encode categorical variables
X = pd.get_dummies(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

# Add predictions and target for Evidently
X_train['prediction'] = lr_model.predict_proba(X_train)[:, 1]
X_test['prediction'] = lr_model.predict_proba(X_test)[:, 1]
X_train['target'] = y_train
X_test['target'] = y_test

# Generate Evidently report
lr_class_report = Report(metrics=[ClassificationPreset()])
lr_class_report.run(reference_data=X_train, current_data=X_test)

# Save report as JSON
lr_class_report.save("json_reports/lr_report_v1.json")

# Load JSON data
with open('json_reports/lr_report_v1.json', 'r') as file:
    data = json.load(file)

# Extract metrics from Evidently report
metrics = {}
metric_results = data['suite']['metric_results']

for result in metric_results:
    if 'current' in result:
        metrics['accuracy'] = result['current'].get('accuracy')
        metrics['precision'] = result['current'].get('precision')
        metrics['recall'] = result['current'].get('recall')
        metrics['f1_score'] = result['current'].get('f1_score')
        metrics['roc_auc'] = result['current'].get('roc_auc')
        metrics['log_loss'] = result['current'].get('log_loss')
        break

# Set MLflow tracking URI
#mlflow.set_tracking_uri("http://localhost:5000")  # Adjust if needed

# Set experiment
mlflow.set_experiment('Monitoring with EvidentlyAI')

# Start MLflow run
with mlflow.start_run() as run:
    # Log metrics to MLflow
    for metric_name, metric_value in metrics.items():
        if metric_value is not None:
            mlflow.log_metric(metric_name, metric_value)

    # Log the model
    mlflow.sklearn.log_model(lr_model, "logistic_regression_model")

    # Print run info
    print(f"Run ID: {run.info.run_id}")
    print("Metrics logged:", metrics)


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Run ID: 529ba0760f984dbc969ccf505e337b12
Metrics logged: {'accuracy': 0.7241379310344828, 'precision': 0.7037037037037037, 'recall': 1.0, 'f1_score': None, 'roc_auc': 0.7105263157894737, 'log_loss': 0.6472448466998987}
🏃 View run glamorous-mole-584 at: http://localhost:5000/#/experiments/869127321442836481/runs/529ba0760f984dbc969ccf505e337b12
🧪 View experiment at: http://localhost:5000/#/experiments/869127321442836481


In [10]:
! pip install -U kaleido

Collecting kaleido
  Using cached kaleido-0.2.1-py2.py3-none-win_amd64.whl.metadata (15 kB)
Using cached kaleido-0.2.1-py2.py3-none-win_amd64.whl (65.9 MB)
Installing collected packages: kaleido
Successfully installed kaleido-0.2.1


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from evidently.report import Report
from evidently.metric_preset import ClassificationPreset
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

# Create a directory for saving plots
os.makedirs("plots", exist_ok=True)

# Load and preprocess data
df = pd.read_csv("datasets/student-dataset-v1.csv")

# Encode target variable
le = LabelEncoder()
df['Result'] = le.fit_transform(df['Result'])

# Features and target
X = df.drop(columns=['Result'])
y = df['Result']

# Encode categorical variables
X = pd.get_dummies(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

# Add predictions and target for Evidently
X_train['prediction'] = lr_model.predict_proba(X_train)[:, 1]
X_test['prediction'] = lr_model.predict_proba(X_test)[:, 1]
X_train['target'] = y_train
X_test['target'] = y_test

# Generate Evidently report
lr_class_report = Report(metrics=[ClassificationPreset()])
lr_class_report.run(reference_data=X_train, current_data=X_test)

# Save Evidently report as HTML (interactive visualizations)
html_report_path = "plots/lr_classification_report.html"
lr_class_report.save_html(html_report_path)

# Save report as JSON for metric extraction
json_report_path = "json_reports/lr_report_v1.json"
lr_class_report.save(json_report_path)

# Load JSON data to extract metrics
with open(json_report_path, 'r') as file:
    data = json.load(file)

# Extract metrics
metrics = {}
metric_results = data['suite']['metric_results']
for result in metric_results:
    if 'current' in result:
        metrics['accuracy'] = result['current'].get('accuracy')
        metrics['precision'] = result['current'].get('precision')
        metrics['recall'] = result['current'].get('recall')
        metrics['f1_score'] = result['current'].get('f1_score')
        metrics['roc_auc'] = result['current'].get('roc_auc')
        metrics['log_loss'] = result['current'].get('log_loss')
        break

# Create a Plotly bar plot for metrics
fig = go.Figure()

# Add bars for each metric
metric_names = list(metrics.keys())
metric_values = [v for v in metrics.values() if v is not None]
fig.add_trace(
    go.Bar(
        x=metric_names,
        y=metric_values,
        text=[f"{v:.3f}" for v in metric_values],
        textposition="auto",
        marker=dict(color=metric_values, colorscale="Viridis", showscale=True),
    )
)

# Update layout for a more appealing look
fig.update_layout(
    title="Classification Metrics for Logistic Regression",
    xaxis_title="Metric",
    yaxis_title="Value",
    template="plotly_dark",  # Dark theme for a modern look
    showlegend=False,
    height=500,
    width=800,
)

# Save the plot as HTML (interactive) and PNG (static)
plot_html_path = "plots/classification_metrics.html"
plot_png_path = "plots/classification_metrics.png"
fig.write_html(plot_html_path)
#fig.write_image(plot_png_path)

# Optional: Display the plot (e.g., in Jupyter)
fig.show()

# Set MLflow tracking URI
mlflow.set_tracking_uri("http://localhost:5000")  # Adjust if needed

# Set experiment
mlflow.set_experiment('Monitoring with EvidentlyAI')

# Start MLflow run
with mlflow.start_run() as run:
    # Log metrics to MLflow
    for metric_name, metric_value in metrics.items():
        if metric_value is not None:
            mlflow.log_metric(metric_name, metric_value)

    # Log the model
    mlflow.sklearn.log_model(lr_model, "logistic_regression_model")

    # Log Evidently HTML report and Plotly plots as artifacts
    mlflow.log_artifact(html_report_path)
    mlflow.log_artifact(plot_html_path)
    #mlflow.log_artifact(plot_png_path)

    # Print run info
    print(f"Run ID: {run.info.run_id}")
    print("Metrics logged:", metrics)
    print(f"Artifacts logged: {html_report_path}, {plot_html_path}, {plot_png_path}")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from evidently.report import Report
from evidently.metric_preset import ClassificationPreset
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

# Load and preprocess data
df = pd.read_csv("datasets/student-dataset-v1.csv")

le = LabelEncoder()
df['Result'] = le.fit_transform(df['Result'])

X = df.drop(columns=['Result'])
y = df['Result']

# Encode categorical variables
X = pd.get_dummies(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

# Add predictions and target for Evidently
X_train['prediction'] = lr_model.predict_proba(X_train)[:, 1]
X_test['prediction'] = lr_model.predict_proba(X_test)[:, 1]
X_train['target'] = y_train
X_test['target'] = y_test

# Generate Evidently report
lr_class_report = Report(metrics=[ClassificationPreset()])
lr_class_report.run(reference_data=X_train, current_data=X_test)

# Save report as JSON
report_path = "json_reports/lr_report_v1.json"
lr_class_report.save(report_path)

# Load JSON data
with open(report_path, 'r') as file:
    data = json.load(file)

# Extract metrics from Evidently report
metric_results = data['suite']['metric_results']
metrics = {}

for result in metric_results:
    if 'current' in result:
        metrics.update(result['current'])  # Collect all metrics from 'current'

# Define metrics to log (ensure they exist in the report)
desired_metrics = ['accuracy', 'precision', 'recall', 'f1_score', 'roc_auc', 'log_loss']
logged_metrics = {key: metrics.get(key) for key in desired_metrics if key in metrics}

# Print extracted metrics
for metric_name, metric_value in logged_metrics.items():
    print(f"Logistic Regression v1 {metric_name}: {metric_value}")

# Set MLflow tracking URI
mlflow.set_tracking_uri("http://localhost:5000")

# Log into MLflow
client = MlflowClient()

# Set experiment
mlflow.set_experiment('Monitoring with EvidentlyAI')

# Start new run
with mlflow.start_run() as run:
    # Log all extracted metrics
    for metric_name, metric_value in logged_metrics.items():
        mlflow.log_metric(metric_name, metric_value)

    # Log the model
    mlflow.sklearn.log_model(lr_model, "logistic_regression_model")

    # Log the Evidently report as an artifact
    mlflow.log_artifact(report_path, "evidently_reports")

    # Print run info
    print(run.info)


lbfgs failed to converge (status=1):
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Logistic Regression v1 accuracy: 0.7241379310344828
Logistic Regression v1 precision: 0.7037037037037037
Logistic Regression v1 recall: 1.0
Logistic Regression v1 roc_auc: 0.7105263157894737
Logistic Regression v1 log_loss: 0.6482681117897063




<RunInfo: artifact_uri='/mlflow/mlruns/4/bbce7dff3a9d42e9ad59c65a07a1cbeb/artifacts', end_time=None, experiment_id='4', lifecycle_stage='active', run_id='bbce7dff3a9d42e9ad59c65a07a1cbeb', run_name='classy-slug-608', run_uuid='bbce7dff3a9d42e9ad59c65a07a1cbeb', start_time=1746591992683, status='RUNNING', user_id='guill'>
🏃 View run classy-slug-608 at: http://localhost:5000/#/experiments/4/runs/bbce7dff3a9d42e9ad59c65a07a1cbeb
🧪 View experiment at: http://localhost:5000/#/experiments/4
