# Data drift dashboard in jupyter notebook

## Imports

In [1]:

import numpy as np
import os
import pandas as pd
from pathlib import Path
import requests
from  model_deployment import mood_prediction



from datetime import date
#from sklearn import datasets, ensemble

from evidently.pipeline.column_mapping import ColumnMapping
from evidently.options import ColorOptions
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.metric_preset import TargetDriftPreset

from evidently.metrics import (
    ClassificationQualityMetric,
    ClassificationClassBalance,
    ClassificationConfusionMatrix,
    ClassificationQualityByClass,
    ClassificationClassSeparationPlot,
    ClassificationProbDistribution,
    ClassificationRocCurve,
    ClassificationPRCurve,
    ClassificationPRTable,
    ClassificationQualityByFeatureTable,
    
    ConflictTargetMetric,
    ConflictPredictionMetric,
    DatasetSummaryMetric,
    ColumnSummaryMetric,
    DatasetMissingValuesMetric,
    DatasetCorrelationsMetric
)




  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


## Spotify Data

### Load data

In [2]:
csv = (
            "C:/Users/willi/Python/Spotify_Project/Data/preprocess_data.csv"
        )

data = pd.read_csv(csv, sep=",")

In [3]:
type(data)

pandas.core.frame.DataFrame

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1800 entries, 0 to 1799
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   popularity        1800 non-null   int64  
 1   genres            1800 non-null   int64  
 2   sub-genres        1800 non-null   int64  
 3   explicit          1800 non-null   int64  
 4   followers         1800 non-null   int64  
 5   danceability      1800 non-null   float64
 6   energy            1800 non-null   float64
 7   key               1800 non-null   int64  
 8   loudness          1800 non-null   float64
 9   mode              1800 non-null   int64  
 10  instrumentalness  1800 non-null   int64  
 11  liveness          1800 non-null   int64  
 12  tempo             1800 non-null   float64
 13  duration_ms       1800 non-null   int64  
 14  time_signature    1800 non-null   int64  
 15  mood              1800 non-null   int64  
dtypes: float64(4), int64(12)
memory usage: 225

## Classification Model

### Config

In [26]:
today = date.today()

target = 'mood'
prediction = 'prediction'
numerical_features = ['popularity', 'followers', 'danceability', 'energy', 'loudness', 'tempo','duration_ms']
categorical_features = ['genres', 'sub-genres', 'explicit','liveness','instrumentalness','key','mode','time_signature']

reports_dir = Path('C:/Users/willi/Python/Spotify_Project/reports') / f'{today}'
reports_dir.mkdir(exist_ok=True)

### Model training

In [6]:
X = data.drop("mood", axis=1)
y = data["mood"]

In [7]:
data['prediction'] = np.array(mood_prediction(X))

 - packaging (current: 23.0, required: packaging==23.1)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


In [8]:
data.head()

Unnamed: 0,popularity,genres,sub-genres,explicit,followers,danceability,energy,key,loudness,mode,instrumentalness,liveness,tempo,duration_ms,time_signature,mood,prediction
0,72,68,119,0,8975886,0.837,0.462,0,-7.89,1,0,0,139.986,201400,4,1,1
1,76,68,136,0,3139709,0.656,0.578,7,-8.97,0,0,0,94.514,256733,4,1,0
2,81,53,65,1,20055519,0.629,0.696,1,-5.572,0,0,0,93.034,207627,4,1,0
3,81,24,119,0,55100090,0.779,0.64,7,-8.415,1,0,0,99.019,246960,4,0,1
4,82,84,57,1,11116401,0.614,0.574,11,-7.961,1,0,0,125.173,209107,5,1,0


# Model Monitoring

In [27]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features

In [28]:
data.loc[:1499]

Unnamed: 0,popularity,genres,sub-genres,explicit,followers,danceability,energy,key,loudness,mode,instrumentalness,liveness,tempo,duration_ms,time_signature,mood,prediction
0,72,68,119,0,8975886,0.837,0.462,0,-7.890,1,0,0,139.986,201400,4,1,1
1,76,68,136,0,3139709,0.656,0.578,7,-8.970,0,0,0,94.514,256733,4,1,0
2,81,53,65,1,20055519,0.629,0.696,1,-5.572,0,0,0,93.034,207627,4,1,0
3,81,24,119,0,55100090,0.779,0.640,7,-8.415,1,0,0,99.019,246960,4,0,1
4,82,84,57,1,11116401,0.614,0.574,11,-7.961,1,0,0,125.173,209107,5,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,71,18,138,1,2754087,0.919,0.622,1,-7.384,1,0,0,140.022,205803,4,0,1
1496,70,113,167,0,1186828,0.902,0.839,11,-9.447,0,0,0,133.989,190537,4,1,1
1497,70,33,29,0,141921,0.847,0.678,9,-8.635,1,0,0,97.984,200594,4,1,1
1498,70,145,161,1,1226755,0.792,0.743,7,-2.806,1,0,0,150.024,195637,4,1,1


In [29]:

data.loc[1500:]

Unnamed: 0,popularity,genres,sub-genres,explicit,followers,danceability,energy,key,loudness,mode,instrumentalness,liveness,tempo,duration_ms,time_signature,mood,prediction
1500,19,44,15,0,64750237,0.513,0.796,1,-4.075,1,0,0,171.017,201573,4,0,1
1501,83,145,138,1,9329274,0.896,0.586,10,-6.687,0,0,0,116.971,196653,4,1,1
1502,78,170,145,1,714458,0.621,0.601,6,-5.616,0,0,0,116.735,163636,5,0,0
1503,90,168,176,0,27475028,0.548,0.816,0,-4.209,1,0,0,95.390,174000,4,1,1
1504,54,168,176,0,74266491,0.613,0.581,0,-8.588,0,0,0,130.033,239560,4,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795,79,63,65,1,23948655,0.775,0.736,1,-8.072,0,0,0,100.988,239027,4,0,1
1796,70,189,176,1,1034952,0.937,0.793,11,-4.431,0,0,0,106.022,172540,4,1,1
1797,86,144,176,1,3017901,0.911,0.573,6,-7.430,1,0,0,140.040,97393,4,1,1
1798,77,18,91,1,5247957,0.773,0.422,1,-4.572,0,0,0,77.502,136267,1,0,0


## Model perfomance

In [30]:
#label binary classification
classification_report = Report(metrics=[
    ClassificationQualityMetric(),
    ClassificationClassBalance(),
    ConflictTargetMetric(),
    ConflictPredictionMetric(),
    ClassificationConfusionMatrix(),
    ClassificationQualityByClass(),
    ClassificationQualityByFeatureTable(columns=['energy', 'loudness']),
])

classification_report.run(reference_data = data.loc[:1499], current_data= data.loc[1500:], column_mapping=column_mapping)
classification_report

KeyError: 1

<evidently.report.report.Report at 0x27c0d29f6d0>

In [31]:
model_performance_report_path = reports_dir / 'model_performance.html'
classification_report.save_html(model_performance_report_path)

KeyError: 1

##  Target drift

In [None]:
target_drift_report = Report(metrics=[TargetDriftPreset()])
target_drift_report.run(
    reference_data=reference,
    current_data=current.loc[CUR_WEEK_START:CUR_WEEK_END],
    column_mapping=column_mapping
)

In [None]:
target_drift_report_path = reports_dir / 'target_drift.html'
target_drift_report.save_html(target_drift_report_path)

## Data drift

In [None]:
column_mapping = ColumnMapping()
column_mapping.numerical_features = numerical_features

In [None]:
data_drift_report = Report(metrics=[DataDriftPreset()])
data_drift_report.run(
    reference_data=reference,
    current_data=current.loc[CUR_WEEK_START:CUR_WEEK_END],
    column_mapping=column_mapping
)

In [None]:
data_drift_report_path = reports_dir / 'data_drift.html'
data_drift_report.save_html(data_drift_report_path)

## Data quality

In [None]:
column_mapping = ColumnMapping()
column_mapping.numerical_features = numerical_features

In [None]:
data_quality_report = Report(metrics=[DataQualityPreset()])
data_quality_report.run(
    reference_data=reference,
    current_data=current.loc[CUR_WEEK_START:CUR_WEEK_END],
    column_mapping=column_mapping
)

In [None]:
data_quality_report_path = reports_dir / 'data_quality.html'
data_quality_report.save_html(data_quality_report_path)