In [1]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from evidently.report import Report
from evidently.metric_preset import ClassificationPreset
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression


import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

In [3]:
df = pd.read_csv("datasets/student-dataset-v1.csv")

In [4]:
le = LabelEncoder()

df['Result'] = le.fit_transform(df['Result'] )

In [5]:
X = df.drop(columns=['Result'])  
y = df['Result']  

In [6]:
# Encode categorical variables 
X = pd.get_dummies(X)

In [7]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

In [9]:
X_train['prediction'] = lr_model.predict_proba(X_train)[:, 1]
X_test['prediction'] = lr_model.predict_proba(X_test)[:, 1]

In [10]:
X_train['target'] = y_train
X_test['target'] = y_test

In [11]:
lr_class_report= Report(metrics=[ClassificationPreset()])
lr_class_report.run(reference_data=X_train, current_data=X_test)

In [12]:
lr_class_report.save("json_reports/lr_report_v1.json")

In [13]:
# Load JSON data
with open('json_reports/lr_report_v1.json', 'r') as file:
    data = json.load(file)

In [14]:
lr_accuracy_v1 = None
metric_results = data['suite']['metric_results']

for result in metric_results:
    if 'current' in result:
        lr_accuracy_v1 = result['current'].get('accuracy')
        if lr_accuracy_v1 is not None:
            break

if lr_accuracy_v1 is not None:
    print("Logistic Regression Accuracy:", lr_accuracy_v1)
else:
    print("Accuracy not found in the JSON data.")

Logistic Regression v1 Accuracy: 0.7241379310344828


In [15]:
#log into MLflow
client = MlflowClient()

#set experiment
mlflow.set_experiment('Monitoring with EvidentlyAI')

#start new run

with mlflow.start_run() as run: 
    
    # Log metrics
    mlflow.log_metric("accuracy", lr_accuracy_v1)
    

    mlflow.sklearn.log_model(lr_model, "logistic_regression_model")

    print(run.info)

2024/05/19 19:16:50 INFO mlflow.tracking.fluent: Experiment with name 'Monitoring with EvidentlyAI' does not exist. Creating a new experiment.


<RunInfo: artifact_uri='file:///Users/sanjju/projects/evidently-ai-poc/mlruns/961957647787017758/d5752c1bac0b49b78e93c0cdc9c734f1/artifacts', end_time=None, experiment_id='961957647787017758', lifecycle_stage='active', run_id='d5752c1bac0b49b78e93c0cdc9c734f1', run_name='silent-donkey-719', run_uuid='d5752c1bac0b49b78e93c0cdc9c734f1', start_time=1716126410348, status='RUNNING', user_id='sanjju'>
