# Classification Report

## Imports

In [1]:

import numpy as np
import pandas as pd
from pathlib import Path
from model_deployment import mood_prediction


from datetime import date
from sklearn.model_selection import train_test_split


from evidently.pipeline.column_mapping import ColumnMapping
from evidently.options import ColorOptions
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.metric_preset import DataQualityPreset
from evidently.metric_preset import TargetDriftPreset
from evidently.metric_preset import ClassificationPreset

from evidently.metrics import (
    ClassificationQualityMetric,
    ClassificationClassBalance,
    ClassificationConfusionMatrix,
    ClassificationQualityByClass,
    ClassificationClassSeparationPlot,
    ClassificationProbDistribution,
    ClassificationRocCurve,
    ClassificationPRCurve,
    ClassificationPRTable,
    ClassificationQualityByFeatureTable,
    
    ConflictTargetMetric,
    ConflictPredictionMetric,
    DatasetSummaryMetric,
    ColumnSummaryMetric,
    DatasetMissingValuesMetric,
    DatasetCorrelationsMetric
)

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


## Spotify Data

### Load data

In [2]:
csv = (
            "C:/Users/willi/Python/Spotify_Project/Data/preprocess_data.csv"
        )

data = pd.read_csv(csv, sep=",")

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1800 entries, 0 to 1799
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   popularity        1800 non-null   int64  
 1   genres            1800 non-null   int64  
 2   sub-genres        1800 non-null   int64  
 3   explicit          1800 non-null   int64  
 4   followers         1800 non-null   int64  
 5   danceability      1800 non-null   float64
 6   energy            1800 non-null   float64
 7   key               1800 non-null   int64  
 8   loudness          1800 non-null   float64
 9   mode              1800 non-null   int64  
 10  instrumentalness  1800 non-null   int64  
 11  liveness          1800 non-null   int64  
 12  tempo             1800 non-null   float64
 13  duration_ms       1800 non-null   int64  
 14  time_signature    1800 non-null   int64  
 15  mood              1800 non-null   int64  
dtypes: float64(4), int64(12)
memory usage: 225

## Classification Model

### Config

In [5]:
#today = date.today()

target = 'mood'
prediction = 'prediction'
numerical_features = ['popularity', 'followers', 'danceability', 'energy', 'loudness', 'tempo','duration_ms']
categorical_features = ['genres', 'sub-genres', 'explicit','liveness','instrumentalness','key','mode','time_signature']

reports_dir = Path('C:/Users/willi/Python/Spotify_Project/reports') #/ f'{today}'
reports_dir.mkdir(exist_ok=True)

### Model training

In [54]:
X = data.drop("mood", axis=1)
y = data["mood"]

In [55]:
X['prediction'] = np.array(mood_prediction(X))

 - packaging (current: 23.0, required: packaging==23.1)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [57]:
df_train = pd.concat([X_train,y_train],axis=1)
df_train.reset_index(inplace=True, drop=True)

df_test = pd.concat([X_test,y_test],axis=1)
df_test.reset_index(inplace=True, drop=True)

In [59]:
df_train['mood'] = df_train['mood'].astype('str')
df_train['prediction'] = df_train['prediction'].astype('str')
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1440 entries, 0 to 1439
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   popularity        1440 non-null   int64  
 1   genres            1440 non-null   int64  
 2   sub-genres        1440 non-null   int64  
 3   explicit          1440 non-null   int64  
 4   followers         1440 non-null   int64  
 5   danceability      1440 non-null   float64
 6   energy            1440 non-null   float64
 7   key               1440 non-null   int64  
 8   loudness          1440 non-null   float64
 9   mode              1440 non-null   int64  
 10  instrumentalness  1440 non-null   int64  
 11  liveness          1440 non-null   int64  
 12  tempo             1440 non-null   float64
 13  duration_ms       1440 non-null   int64  
 14  time_signature    1440 non-null   int64  
 15  prediction        1440 non-null   object 
 16  mood              1440 non-null   object 


In [60]:
df_test['mood'] = df_test['mood'].astype('str')
df_test['prediction'] = df_test['prediction'].astype('str')
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   popularity        360 non-null    int64  
 1   genres            360 non-null    int64  
 2   sub-genres        360 non-null    int64  
 3   explicit          360 non-null    int64  
 4   followers         360 non-null    int64  
 5   danceability      360 non-null    float64
 6   energy            360 non-null    float64
 7   key               360 non-null    int64  
 8   loudness          360 non-null    float64
 9   mode              360 non-null    int64  
 10  instrumentalness  360 non-null    int64  
 11  liveness          360 non-null    int64  
 12  tempo             360 non-null    float64
 13  duration_ms       360 non-null    int64  
 14  time_signature    360 non-null    int64  
 15  prediction        360 non-null    object 
 16  mood              360 non-null    object 
dt

# Model Monitoring

In [61]:
reference_data = df_train

In [62]:
current_data = df_test

In [66]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features
column_mapping.pos_label = '1'

## Model perfomance

In [73]:
#label binary classification
classification_report = Report(metrics=[
    ClassificationQualityMetric(),
    ClassificationClassBalance(),
    ConflictTargetMetric(),
    ConflictPredictionMetric(),
    ClassificationConfusionMatrix(),
    ClassificationQualityByClass(),
    ClassificationQualityByFeatureTable(columns = numerical_features),
])

classification_report.run(reference_data = reference_data, current_data= current_data, column_mapping=column_mapping)

In [74]:
classification_report_path = reports_dir / 'classification_report.html'
classification_report.save_html(classification_report_path)

##  Target drift

In [None]:
target_drift_report = Report(metrics=[TargetDriftPreset()])
target_drift_report.run(
    reference_data=reference_data,
    current_data=current_data,
    column_mapping=column_mapping
)

In [None]:
target_drift_report_path = reports_dir / 'target_drift.html'
target_drift_report.save_html(target_drift_report_path)

## Data drift

In [None]:
column_mapping = ColumnMapping()
column_mapping.numerical_features = numerical_features

In [None]:
data_drift_report = Report(metrics=[DataDriftPreset()])
data_drift_report.run(
    reference_data=reference_data,
    current_data=current_data,
    column_mapping=column_mapping
)

In [None]:
data_drift_report_path = reports_dir / 'data_drift.html'
data_drift_report.save_html(data_drift_report_path)

## Data quality

In [None]:
column_mapping = ColumnMapping()
column_mapping.numerical_features = numerical_features

In [None]:
data_quality_report = Report(metrics=[DataQualityPreset()])
data_quality_report.run(
    reference_data=reference_data,
    current_data=current_data,
    column_mapping=column_mapping
)

In [None]:
data_quality_report_path = reports_dir / 'data_quality.html'
data_quality_report.save_html(data_quality_report_path)