# Classification Report

## Imports

In [1]:

import numpy as np
import pandas as pd
from pathlib import Path
from model_deployment import mood_prediction


from datetime import date
from sklearn.model_selection import train_test_split


from evidently.pipeline.column_mapping import ColumnMapping
from evidently.options import ColorOptions
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.metric_preset import DataQualityPreset
from evidently.metric_preset import TargetDriftPreset
from evidently.metric_preset import ClassificationPreset

from evidently.metrics import (
    ClassificationQualityMetric,
    ClassificationClassBalance,
    ClassificationConfusionMatrix,
    ClassificationQualityByClass,
    ClassificationQualityByFeatureTable,
    ConflictTargetMetric,
    ConflictPredictionMetric 
)

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


## Spotify Data

### Load data

In [10]:
#Data Types dictionary
dtype = {
       'popularity':'int8',
       'genres': 'int8',
       'sub-genres': 'int8',
       'explicit':'int8', 
       'followers': int, 
       'danceability':float,
       'energy': float, 
       'key':'int8',
       'loudness': float,
       'mode':'int8', 
       'instrumentalness':'int8',
       'liveness':'int8',
       'tempo':float, 
       'duration_ms':int,
       'time_signature':'int8',
       'mood': 'int8' 
       }

In [11]:
csv = (
        "C:/Users/willi/Python/Spotify_Project/Data/preprocess_data.csv"
        )

data = pd.read_csv(csv, sep=",", dtype = dtype)

In [12]:
csv = (
        "C:/Users/willi/Python/Spotify_Project/Data/preprocess_new_data.csv"
        )

new_data = pd.read_csv(csv, sep=",",dtype=dtype)

In [5]:
new_data.head()

Unnamed: 0,popularity,genres,sub-genres,explicit,followers,danceability,energy,key,loudness,mode,instrumentalness,liveness,tempo,duration_ms,time_signature,mood
0,87,9,3,1,5560201,0.829,0.436,3,-8.205,1,0,0,143.031,223204,4,1
1,90,14,27,1,5904499,0.517,0.675,6,-5.382,1,0,0,203.853,163855,4,1
2,90,20,9,0,5482747,0.574,0.935,1,-2.783,1,0,0,166.008,213234,4,1
3,99,15,4,0,4018287,0.668,0.758,5,-5.176,0,0,0,147.989,165671,3,1
4,91,8,2,1,75347210,0.817,0.44,10,-8.482,0,0,0,142.024,272113,4,1


In [6]:
new_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   popularity        100 non-null    int8   
 1   genres            100 non-null    object 
 2   sub-genres        100 non-null    object 
 3   explicit          100 non-null    int8   
 4   followers         100 non-null    int32  
 5   danceability      100 non-null    float64
 6   energy            100 non-null    float64
 7   key               100 non-null    int8   
 8   loudness          100 non-null    float64
 9   mode              100 non-null    int8   
 10  instrumentalness  100 non-null    int8   
 11  liveness          100 non-null    int8   
 12  tempo             100 non-null    float64
 13  duration_ms       100 non-null    int32  
 14  time_signature    100 non-null    int8   
 15  mood              100 non-null    int8   
dtypes: float64(4), int32(2), int8(8), object(2)
m

## Classification Model

### Config

In [13]:
#today = date.today()

target = 'mood'
prediction = 'prediction'
numerical_features = ['popularity', 'followers', 'danceability', 'energy', 'loudness', 'tempo','duration_ms']
categorical_features = ['genres', 'sub-genres', 'explicit','liveness','instrumentalness','key','mode','time_signature']

reports_dir = Path('C:/Users/willi/Python/Spotify_Project/reports') #/ f'{today}'
reports_dir.mkdir(exist_ok=True)

### Model training

In [14]:
X_data = data.drop("mood", axis=1)
X_new_data = new_data.drop("mood", axis=1)

In [15]:
data['prediction']= mood_prediction(X_data)
new_data['prediction'] = mood_prediction(X_new_data)

 - packaging (current: 23.0, required: packaging==23.1)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
 - packaging (current: 23.0, required: packaging==23.1)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


In [16]:
data['mood'] = data['mood'].astype('str')
data['prediction'] = data['prediction'].astype('str')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1800 entries, 0 to 1799
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   popularity        1800 non-null   int8   
 1   genres            1800 non-null   int8   
 2   sub-genres        1800 non-null   int8   
 3   explicit          1800 non-null   int8   
 4   followers         1800 non-null   int32  
 5   danceability      1800 non-null   float64
 6   energy            1800 non-null   float64
 7   key               1800 non-null   int8   
 8   loudness          1800 non-null   float64
 9   mode              1800 non-null   int8   
 10  instrumentalness  1800 non-null   int8   
 11  liveness          1800 non-null   int8   
 12  tempo             1800 non-null   float64
 13  duration_ms       1800 non-null   int32  
 14  time_signature    1800 non-null   int8   
 15  mood              1800 non-null   object 
 16  prediction        1800 non-null   object 


In [17]:
new_data['mood'] = new_data['mood'].astype('str')
new_data['prediction'] = new_data['prediction'].astype('str')
new_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   popularity        100 non-null    int8   
 1   genres            100 non-null    int8   
 2   sub-genres        100 non-null    int8   
 3   explicit          100 non-null    int8   
 4   followers         100 non-null    int32  
 5   danceability      100 non-null    float64
 6   energy            100 non-null    float64
 7   key               100 non-null    int8   
 8   loudness          100 non-null    float64
 9   mode              100 non-null    int8   
 10  instrumentalness  100 non-null    int8   
 11  liveness          100 non-null    int8   
 12  tempo             100 non-null    float64
 13  duration_ms       100 non-null    int32  
 14  time_signature    100 non-null    int8   
 15  mood              100 non-null    object 
 16  prediction        100 non-null    object 
dty

# Model Monitoring

In [18]:
reference_data = data

In [19]:
current_data = new_data

In [20]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features
column_mapping.pos_label = '1'

## Model perfomance

In [21]:
#label binary classification
classification_report = Report(metrics=[
    ClassificationQualityMetric(),
    ClassificationClassBalance(),
    ConflictTargetMetric(),
    ConflictPredictionMetric(),
    ClassificationConfusionMatrix(),
    ClassificationQualityByClass(),
    ClassificationQualityByFeatureTable(columns = numerical_features),
])

classification_report.run(reference_data = reference_data, current_data= current_data, column_mapping=column_mapping)

In [22]:
classification_report_path = reports_dir / 'classification_report.html'
classification_report.save_html(classification_report_path)

##  Target drift

In [23]:
target_drift_report = Report(metrics=[TargetDriftPreset()])
target_drift_report.run(
    reference_data=reference_data,
    current_data=current_data,
    column_mapping=column_mapping
)

In [24]:
target_drift_report_path = reports_dir / 'target_drift.html'
target_drift_report.save_html(target_drift_report_path)

## Data drift

In [25]:
column_mapping = ColumnMapping()
column_mapping.numerical_features = numerical_features

In [26]:
data_drift_report = Report(metrics=[DataDriftPreset()])
data_drift_report.run(
    reference_data=reference_data,
    current_data=current_data,
    column_mapping=column_mapping
)

In [27]:
data_drift_report_path = reports_dir / 'data_drift.html'
data_drift_report.save_html(data_drift_report_path)

## Data quality

In [28]:
column_mapping = ColumnMapping()
column_mapping.numerical_features = numerical_features

In [29]:
data_quality_report = Report(metrics=[DataQualityPreset()])
data_quality_report.run(
    reference_data=reference_data,
    current_data=current_data,
    column_mapping=column_mapping
)

In [30]:
data_quality_report_path = reports_dir / 'data_quality.html'
data_quality_report.save_html(data_quality_report_path)