In [21]:
import pandas as pd
import numpy as np
import json

from sklearn import model_selection
from sklearn.ensemble import RandomForestClassifier

from evidently.dashboard import Dashboard
from evidently.model_profile import Profile
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.dashboard.tabs import ProbClassificationPerformanceTab
from evidently.model_profile.sections import DataDriftProfileSection

from evidently.dashboard.tabs import (
    DataDriftTab,
    CatTargetDriftTab,
    NumTargetDriftTab,
)

import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

data = pd.read_csv('WineQT.csv')

In [6]:
train_data, test_data = model_selection.train_test_split(data, random_state=0)

In [7]:
features = train_data.columns[:-2]
target = ["quality"]

In [9]:
train_data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
733,6.9,0.44,0.00,1.40,0.070,32.0,38.0,0.99438,3.32,0.58,11.4,6,1045
520,9.5,0.55,0.66,2.30,0.387,12.0,37.0,0.99820,3.17,0.67,9.6,5,730
1083,6.5,0.53,0.06,2.00,0.063,29.0,44.0,0.99489,3.38,0.83,10.3,6,1517
989,8.0,0.52,0.25,2.00,0.078,19.0,59.0,0.99612,3.30,0.48,10.2,5,1393
188,12.5,0.56,0.49,2.40,0.064,5.0,27.0,0.99990,3.08,0.87,10.9,5,264
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1033,6.6,0.58,0.02,2.00,0.062,37.0,53.0,0.99374,3.35,0.76,11.6,7,1452
763,8.7,0.42,0.45,2.40,0.072,32.0,59.0,0.99617,3.33,0.77,12.0,6,1083
835,9.8,0.39,0.43,1.65,0.068,5.0,11.0,0.99478,3.19,0.46,11.4,5,1181
559,7.1,0.52,0.03,2.60,0.076,21.0,92.0,0.99745,3.50,0.60,9.8,5,779


In [10]:
# modeling
from sklearn.metrics import classification_report
model = RandomForestClassifier(random_state=1)
model.fit(train_data[features], train_data[target])
train_probas = pd.DataFrame(model.predict_proba(train_data[features]))
test_probas = pd.DataFrame(model.predict_proba(test_data[features]))

  model.fit(train_data[features], train_data[target])


In [11]:
# Model Performance
train_probas_head = train_data['quality'].drop_duplicates().sort_values().reset_index()
train_probas.columns = train_probas_head['quality']
train_probas

quality,3,4,5,6,7,8
0,0.00,0.00,0.10,0.88,0.02,0.00
1,0.02,0.00,0.73,0.24,0.01,0.00
2,0.00,0.00,0.11,0.83,0.06,0.00
3,0.00,0.01,0.83,0.16,0.00,0.00
4,0.00,0.00,0.65,0.28,0.07,0.00
...,...,...,...,...,...,...
852,0.00,0.00,0.03,0.23,0.74,0.00
853,0.00,0.00,0.00,0.82,0.16,0.02
854,0.00,0.00,0.60,0.17,0.22,0.01
855,0.00,0.01,0.85,0.14,0.00,0.00


In [12]:
# Model Performance
test_probas_head = test_data['quality'].drop_duplicates().sort_values().reset_index()
test_probas.columns = test_probas_head['quality']
test_probas

quality,3,4,5,6,7,8
0,0.01,0.10,0.56,0.33,0.00,0.00
1,0.02,0.16,0.23,0.51,0.08,0.00
2,0.02,0.07,0.71,0.17,0.03,0.00
3,0.00,0.00,0.17,0.79,0.04,0.00
4,0.00,0.00,0.64,0.35,0.01,0.00
...,...,...,...,...,...,...
281,0.00,0.01,0.67,0.22,0.09,0.01
282,0.10,0.21,0.57,0.11,0.01,0.00
283,0.00,0.01,0.26,0.65,0.04,0.04
284,0.00,0.00,0.05,0.29,0.62,0.04


In [13]:
# Both
train_data = train_data.reset_index()
train_data = train_data.drop(columns = ['index', 'Id'])
test_data = test_data.reset_index()
test_data = test_data.drop(columns = ['index', 'Id'])
train_data.columns = ['target' if x == 'quality' else x for x in train_data.columns]
test_data.columns = ['target' if x == 'quality' else x for x in test_data.columns]

In [14]:
# Model Performance
merged_train_data = pd.concat([train_data, train_probas], axis = 1)
merged_test_data = pd.concat([test_data, test_probas], axis = 1)

In [22]:
# Data Drift
wine_quality_dashboard = Dashboard(tabs=[DataDriftTab(), NumTargetDriftTab()])
wine_quality_dashboard.calculate(train_data, test_data, column_mapping=None)
wine_quality_dashboard.save('wine_quality_dashboard.html')

In [12]:
# Model Performance
wine_quality_mapping = ColumnMapping()
wine_quality_mapping.target = 'target'
wine_quality_mapping.prediction = [3, 4, 5, 6, 7, 8]
wine_quality_mapping.numerical_features = features
wine_quality_classification_dashboard = Dashboard(tabs=[ProbClassificationPerformanceTab(verbose_level=1)])
wine_quality_classification_dashboard.calculate(merged_train_data, merged_test_data, 
                                             column_mapping = wine_quality_mapping)
wine_quality_classification_dashboard.save('wine_quality_classification_dashboard.html')


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

