In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import mlflow
import mlflow.sklearn

In [3]:
import os
import librosa
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import (
    precision_score, 
    recall_score, 
    f1_score, 
    roc_auc_score, 
    accuracy_score, 
    classification_report, 
    confusion_matrix, 
    RocCurveDisplay
)
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier





import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm
from joblib import Parallel, delayed



from utils import load_config
from models.baseline import preprocess_data

In [4]:
config       = load_config()
features_dir = config['data_paths']['features']
features_csv = os.path.join(features_dir, 'all_features.csv')

### Load features

In [8]:
features_df = pd.read_csv(features_csv)
features_df.head()

Unnamed: 0,audio_id,real_or_fake,spectral_centroid_mean,spectral_centroid_std,spectral_centroid_var,spectral_centroid_min,spectral_centroid_max,spectral_centroid_range,spectral_centroid_25th_percentile,spectral_centroid_50th_percentile,...,shimmer_dda,hnr,voicedcount,npause,originaldur,intensity_duration,speakingrate,articulationrate,asd,totalpauseduration
0,LJ028-0143,WF1,1874.13394,1345.530923,1810453.0,526.534014,6285.029512,5758.495498,975.050304,1345.562364,...,0.099888,11.673236,32.0,2.0,7.360726,7.360726,4.347397,4.925558,0.203023,0.864
1,LJ028-0143,WF2,1896.024231,1357.51481,1842846.0,585.883222,6304.308256,5718.425035,985.413912,1390.383399,...,0.096005,12.024721,29.0,2.0,7.360726,7.360726,3.939829,4.463787,0.224025,0.864
2,LJ028-0143,WF3,1918.827899,1370.524166,1878336.0,550.249874,6265.837711,5715.587837,1007.631444,1414.627604,...,0.111386,11.691639,30.0,2.0,7.349116,7.349116,4.082124,4.614593,0.216704,0.848
3,LJ028-0143,WF4,1871.877706,1335.658077,1783982.0,515.918819,6277.746003,5761.827184,974.725797,1356.723998,...,0.088301,12.341544,33.0,2.0,7.360726,7.360726,4.483254,5.079482,0.19687,0.864
4,LJ028-0143,WF5,1939.824198,1358.239585,1844815.0,555.707767,6259.423582,5703.715815,1010.880341,1418.982549,...,0.109853,11.31168,32.0,2.0,7.360726,7.360726,4.347397,4.925558,0.203023,0.864


In [14]:
selected_features = ['spectral_contrast_var', 'spectral_contrast_range', 'spectral_contrast_mean', 'F3_mean', 'F2_stdev', 'F3_stdev', 'F1_stdev', 'mfcc_13_std', 'F2_mean', 'mfcc_6_75th_percentile', 'mfcc_12_75th_percentile', 'mfcc_9_75th_percentile', 'mfcc_3_75th_percentile', 'mfcc_12_50th_percentile', 'mfcc_9_50th_percentile', 'mfcc_2_50th_percentile', 'mfcc_5_50th_percentile', 'mfcc_7_50th_percentile', 'f0_skew', 'pause_std', 'asd', 'pause_75th_percentile', 'chroma_11_50th_percentile', 'chroma_3_50th_percentile', 'chroma_6_50th_percentile', 'spectral_flux_skew', 'mfcc_12_25th_percentile', 'mfcc_6_25th_percentile', 'mfcc_2_25th_percentile', 'spectral_bandwidth_min', 'zero_crossing_rate_skew', 'chroma_1_range', 'speaking_rate', 'chroma_12_range', 'chroma_2_range', 'chroma_3_range', 'chroma_5_range', 'chroma_10_range', 'spectral_flatness_skew', 'chroma_6_range', 'chroma_8_range', 'chroma_7_range', 'chroma_9_range', 'f0_kurtosis', 'chroma_11_range', 'spectral_bandwidth_kurtosis', 'chroma_6_max', 'chroma_10_max', 'chroma_2_max', 'chroma_12_max', 'chroma_5_max', 'chroma_7_max', 'chroma_4_max', 'chroma_1_max', 'chroma_11_max', 'chroma_4_std', 'chroma_6_std', 'chroma_7_std', 'chroma_3_max', 'chroma_12_std', 'chroma_11_std', 'chroma_2_std', 'chroma_10_std', 'chroma_3_std', 'chroma_9_std', 'chroma_8_std', 'chroma_5_std', 'chroma_1_std', 'zero_crossing_rate_range', 'mfcc_1_skew', 'spectral_rolloff_range', 'f0_25th_percentile', 'pause_skew', 'chroma_9_min', 'mfcc_13_mean', 'mfcc_11_mean', 'zero_crossing_rate_min', 'spectral_bandwidth_max', 'mfcc_10_max', 'f0_75th_percentile', 'mfcc_5_max', 'mfcc_6_mean', 'mfcc_3_max', 'jitter_local', 'spectral_flux_25th_percentile', 'spectral_flatness_min', 'energy_min', 'shimmer_local', 'spectral_flatness_range']
features_df = features_df[['audio_id'] + selected_features + ['real_or_fake']].copy()

In [9]:
def train_eval_model(model, model_name, X_train, X_test, y_train, y_test):
    run_name = f"{model_name}_run"
    with mlflow.start_run(run_name=run_name):
        # Log model name
        mlflow.log_param("model", model_name)

        # Train the model
        model.fit(X_train, y_train)

        # Make predictions and evaluate the model
        predictions = model.predict(X_test)
        probabilities = model.predict_proba(X_test)[:, 1]  # probabilities for the positive class

        # Calculate metrics
        accuracy = accuracy_score(y_test, predictions)
        precision = precision_score(y_test, predictions)
        recall = recall_score(y_test, predictions)
        f1 = f1_score(y_test, predictions)
        roc_auc = roc_auc_score(y_test, probabilities)

        # Log metrics
        mlflow.log_metrics({
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1_score": f1,
            "roc_auc": roc_auc
        })

        # Save and log classification report as text file
        report = classification_report(y_test, predictions)
        print(report)
        with open("classification_report.txt", "w") as f:
            f.write(report)
        mlflow.log_artifact("classification_report.txt")

        # Plot and log confusion matrix as an image
        cm = confusion_matrix(y_test, predictions)
        plt.figure(figsize=(10, 7))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.ylabel('Actual label')
        plt.xlabel('Predicted label')
        confusion_matrix_path = "confusion_matrix.png"
        plt.savefig(confusion_matrix_path)
        mlflow.log_artifact(confusion_matrix_path)
        plt.close()

        # Plot ROC curve
        fig, ax = plt.subplots()
        RocCurveDisplay.from_predictions(y_test, probabilities, ax=ax)
        plt.title('ROC Curve')
        mlflow.log_figure(fig, "roc_curve.png")
        plt.close(fig)

        print(f"Run Name: {run_name} | Model: {model_name} | Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1 Score: {f1:.4f} | ROC AUC: {roc_auc:.4f}")


In [13]:
## codecfake fake versions vs R

In [17]:
for fake_version in ['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'WF1', 'WF2', 'WF3', 'WF4', 'WF5', 'WF6', 'WF7']:
    f_df = features_df[features_df['real_or_fake'].isin(['R', fake_version])].copy()
    f_df.loc[:, 'target'] = f_df['real_or_fake'].apply(lambda x: 0 if x == 'R' else 1)

    ## MLflow
    mlflow.set_tracking_uri("http://mlflow:5001")
    mlflow.set_experiment('model_training')

    ## Setup for training & evaluation
    features_columns = [col for col in f_df.columns if col not in ['audio_id', 'real_or_fake', 'target']]
    X = f_df[features_columns]
    X = preprocess_data(X)
    y = f_df['target']

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # Logistic Regression
    log_reg = LogisticRegression(max_iter=1000)
    train_eval_model(log_reg, f"logistic_regression_{fake_version}", X_train, X_test, y_train, y_test)

    # Random Forest
    random_forest = RandomForestClassifier(n_estimators=100)
    train_eval_model(random_forest, f"random_forest_{fake_version}", X_train, X_test, y_train, y_test)

2024/08/12 10:46:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_F01_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/e16fdffeb0174b97b04219aa9c78af80.
2024/08/12 10:46:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9651
           1       1.00      1.00      1.00      7031

    accuracy                           1.00     16682
   macro avg       1.00      1.00      1.00     16682
weighted avg       1.00      1.00      1.00     16682

Run Name: logistic_regression_F01_run | Model: logistic_regression_F01 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1 Score: 1.0000 | ROC AUC: 1.0000


2024/08/12 10:46:15 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_F01_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/d64efda13fbf4b4b9aea83be7186354a.
2024/08/12 10:46:15 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9651
           1       1.00      1.00      1.00      7031

    accuracy                           1.00     16682
   macro avg       1.00      1.00      1.00     16682
weighted avg       1.00      1.00      1.00     16682

Run Name: random_forest_F01_run | Model: random_forest_F01 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1 Score: 1.0000 | ROC AUC: 1.0000


2024/08/12 10:46:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_F02_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/4c609c88cacd4f73851c94ea145fef78.
2024/08/12 10:46:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9651
           1       1.00      1.00      1.00      7031

    accuracy                           1.00     16682
   macro avg       1.00      1.00      1.00     16682
weighted avg       1.00      1.00      1.00     16682

Run Name: logistic_regression_F02_run | Model: logistic_regression_F02 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1 Score: 1.0000 | ROC AUC: 1.0000


2024/08/12 10:46:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_F02_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/08457127d1b94faabe4297b97a3baad2.
2024/08/12 10:46:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9651
           1       1.00      1.00      1.00      7031

    accuracy                           1.00     16682
   macro avg       1.00      1.00      1.00     16682
weighted avg       1.00      1.00      1.00     16682

Run Name: random_forest_F02_run | Model: random_forest_F02 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1 Score: 1.0000 | ROC AUC: 1.0000


2024/08/12 10:46:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_F03_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/bd91ee22dfcd40e18faddaefdd1a12fe.
2024/08/12 10:46:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9651
           1       1.00      1.00      1.00      7031

    accuracy                           1.00     16682
   macro avg       1.00      1.00      1.00     16682
weighted avg       1.00      1.00      1.00     16682

Run Name: logistic_regression_F03_run | Model: logistic_regression_F03 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1 Score: 1.0000 | ROC AUC: 1.0000


2024/08/12 10:46:45 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_F03_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/fda675424813481eacf6a6a8ef0604e5.
2024/08/12 10:46:45 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9651
           1       1.00      1.00      1.00      7031

    accuracy                           1.00     16682
   macro avg       1.00      1.00      1.00     16682
weighted avg       1.00      1.00      1.00     16682

Run Name: random_forest_F03_run | Model: random_forest_F03 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1 Score: 1.0000 | ROC AUC: 1.0000


2024/08/12 10:46:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_F04_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/fa0fbb7d1551487c8d631af1cf1c9fc7.
2024/08/12 10:46:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.98      0.98      0.98      9651
           1       0.98      0.97      0.98      7031

    accuracy                           0.98     16682
   macro avg       0.98      0.98      0.98     16682
weighted avg       0.98      0.98      0.98     16682

Run Name: logistic_regression_F04_run | Model: logistic_regression_F04 | Accuracy: 0.9790 | Precision: 0.9762 | Recall: 0.9740 | F1 Score: 0.9751 | ROC AUC: 0.9977
              precision    recall  f1-score   support

           0       0.97      0.98      0.98      9651
           1       0.97      0.96      0.97      7031

    accuracy                           0.97     16682
   macro avg       0.97      0.97      0.97     16682
weighted avg       0.97      0.97      0.97     16682



2024/08/12 10:47:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_F04_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/c5047096622f4ac08e6be598d33e05ed.
2024/08/12 10:47:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


Run Name: random_forest_F04_run | Model: random_forest_F04 | Accuracy: 0.9727 | Precision: 0.9738 | Recall: 0.9610 | F1 Score: 0.9674 | ROC AUC: 0.9969


2024/08/12 10:47:26 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_F05_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/f4cecacd46e545cb8b6f82b4e18c5288.
2024/08/12 10:47:26 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.87      0.85      0.86      9651
           1       0.80      0.82      0.81      7031

    accuracy                           0.84     16682
   macro avg       0.83      0.83      0.83     16682
weighted avg       0.84      0.84      0.84     16682

Run Name: logistic_regression_F05_run | Model: logistic_regression_F05 | Accuracy: 0.8364 | Precision: 0.7976 | Recall: 0.8197 | F1 Score: 0.8085 | ROC AUC: 0.9164


2024/08/12 10:48:04 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_F05_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/987bbb92d9bc4706b16c7ca24c4887ff.
2024/08/12 10:48:04 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.85      0.83      0.84      9651
           1       0.77      0.80      0.78      7031

    accuracy                           0.81     16682
   macro avg       0.81      0.81      0.81     16682
weighted avg       0.82      0.81      0.81     16682

Run Name: random_forest_F05_run | Model: random_forest_F05 | Accuracy: 0.8139 | Precision: 0.7688 | Recall: 0.7985 | F1 Score: 0.7834 | ROC AUC: 0.8962


2024/08/12 10:48:05 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_F06_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/76b004393ede4f9bb5045ed4af8f0405.
2024/08/12 10:48:05 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9651
           1       1.00      1.00      1.00      7031

    accuracy                           1.00     16682
   macro avg       1.00      1.00      1.00     16682
weighted avg       1.00      1.00      1.00     16682

Run Name: logistic_regression_F06_run | Model: logistic_regression_F06 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1 Score: 1.0000 | ROC AUC: 1.0000


2024/08/12 10:48:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_F06_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/e5a8bca377ea404e8f9024b1f544f1c9.
2024/08/12 10:48:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9651
           1       1.00      1.00      1.00      7031

    accuracy                           1.00     16682
   macro avg       1.00      1.00      1.00     16682
weighted avg       1.00      1.00      1.00     16682

Run Name: random_forest_F06_run | Model: random_forest_F06 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1 Score: 1.0000 | ROC AUC: 1.0000


2024/08/12 10:48:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_WF1_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/f28e80fbc45b44dea21f265795c366ba.
2024/08/12 10:48:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.96      0.96      0.96      9651
           1       0.86      0.87      0.86      2620

    accuracy                           0.94     12271
   macro avg       0.91      0.91      0.91     12271
weighted avg       0.94      0.94      0.94     12271

Run Name: logistic_regression_WF1_run | Model: logistic_regression_WF1 | Accuracy: 0.9422 | Precision: 0.8634 | Recall: 0.8664 | F1 Score: 0.8649 | ROC AUC: 0.9825


2024/08/12 10:48:39 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_WF1_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/7ea89012ed9344119fb6499dfbe50426.
2024/08/12 10:48:39 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.96      0.96      0.96      9651
           1       0.85      0.85      0.85      2620

    accuracy                           0.94     12271
   macro avg       0.90      0.90      0.90     12271
weighted avg       0.94      0.94      0.94     12271

Run Name: random_forest_WF1_run | Model: random_forest_WF1 | Accuracy: 0.9351 | Precision: 0.8484 | Recall: 0.8477 | F1 Score: 0.8480 | ROC AUC: 0.9804


2024/08/12 10:48:40 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_WF2_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/872c7cfab5db47e3b41a23b26c83f706.
2024/08/12 10:48:40 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.96      0.95      0.95      9651
           1       0.83      0.84      0.83      2620

    accuracy                           0.93     12271
   macro avg       0.89      0.89      0.89     12271
weighted avg       0.93      0.93      0.93     12271

Run Name: logistic_regression_WF2_run | Model: logistic_regression_WF2 | Accuracy: 0.9277 | Precision: 0.8269 | Recall: 0.8366 | F1 Score: 0.8317 | ROC AUC: 0.9742


2024/08/12 10:48:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_WF2_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/18482cd7bcd746c9adeb788e98e0b8a3.
2024/08/12 10:48:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.94      0.94      0.94      9651
           1       0.78      0.76      0.77      2620

    accuracy                           0.90     12271
   macro avg       0.86      0.85      0.85     12271
weighted avg       0.90      0.90      0.90     12271

Run Name: random_forest_WF2_run | Model: random_forest_WF2 | Accuracy: 0.9035 | Precision: 0.7820 | Recall: 0.7599 | F1 Score: 0.7708 | ROC AUC: 0.9603
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      9651
           1       0.81      0.83      0.82      2620

    accuracy                           0.92     12271
   macro avg       0.88      0.89      0.89     12271
weighted avg       0.92      0.92      0.92     12271



2024/08/12 10:48:57 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_WF3_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/7ac8d93f32d748d68d9d4cf0ae7ad1de.
2024/08/12 10:48:57 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


Run Name: logistic_regression_WF3_run | Model: logistic_regression_WF3 | Accuracy: 0.9232 | Precision: 0.8144 | Recall: 0.8290 | F1 Score: 0.8216 | ROC AUC: 0.9726


2024/08/12 10:49:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_WF3_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/2b952683ccd0483c8ec42ed2104058f8.
2024/08/12 10:49:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.94      0.94      0.94      9651
           1       0.79      0.78      0.79      2620

    accuracy                           0.91     12271
   macro avg       0.87      0.86      0.87     12271
weighted avg       0.91      0.91      0.91     12271

Run Name: random_forest_WF3_run | Model: random_forest_WF3 | Accuracy: 0.9108 | Precision: 0.7947 | Recall: 0.7847 | F1 Score: 0.7897 | ROC AUC: 0.9645


2024/08/12 10:49:15 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_WF4_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/9e3b9b1953f8426ba9c3c78d6ba94c68.
2024/08/12 10:49:15 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.97      0.96      0.96      9651
           1       0.86      0.88      0.87      2620

    accuracy                           0.94     12271
   macro avg       0.91      0.92      0.91     12271
weighted avg       0.94      0.94      0.94     12271

Run Name: logistic_regression_WF4_run | Model: logistic_regression_WF4 | Accuracy: 0.9418 | Precision: 0.8551 | Recall: 0.8760 | F1 Score: 0.8654 | ROC AUC: 0.9825


2024/08/12 10:49:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_WF4_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/093c4fa3d84a411caa74a84af2c33e60.
2024/08/12 10:49:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.96      0.96      0.96      9651
           1       0.85      0.86      0.86      2620

    accuracy                           0.94     12271
   macro avg       0.91      0.91      0.91     12271
weighted avg       0.94      0.94      0.94     12271

Run Name: random_forest_WF4_run | Model: random_forest_WF4 | Accuracy: 0.9380 | Precision: 0.8533 | Recall: 0.8569 | F1 Score: 0.8551 | ROC AUC: 0.9813


2024/08/12 10:49:32 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_WF5_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/0c03d9c271a54078b34eea75649ddf99.
2024/08/12 10:49:32 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.98      0.97      0.98      9651
           1       0.91      0.92      0.91      2620

    accuracy                           0.96     12271
   macro avg       0.94      0.95      0.95     12271
weighted avg       0.96      0.96      0.96     12271

Run Name: logistic_regression_WF5_run | Model: logistic_regression_WF5 | Accuracy: 0.9629 | Precision: 0.9083 | Recall: 0.9191 | F1 Score: 0.9137 | ROC AUC: 0.9912


2024/08/12 10:49:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_WF5_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/254dff748d6a40f0bb2d85ed1d6843fc.
2024/08/12 10:49:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.97      0.96      0.96      9651
           1       0.87      0.88      0.87      2620

    accuracy                           0.94     12271
   macro avg       0.92      0.92      0.92     12271
weighted avg       0.95      0.94      0.95     12271

Run Name: random_forest_WF5_run | Model: random_forest_WF5 | Accuracy: 0.9449 | Precision: 0.8676 | Recall: 0.8756 | F1 Score: 0.8716 | ROC AUC: 0.9850


2024/08/12 10:49:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_WF6_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/6b66d1733a2b4522a2b5623d4e2ae723.
2024/08/12 10:49:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.97      0.97      0.97      9651
           1       0.89      0.90      0.90      2620

    accuracy                           0.96     12271
   macro avg       0.93      0.94      0.94     12271
weighted avg       0.96      0.96      0.96     12271

Run Name: logistic_regression_WF6_run | Model: logistic_regression_WF6 | Accuracy: 0.9562 | Precision: 0.8944 | Recall: 0.9015 | F1 Score: 0.8979 | ROC AUC: 0.9888


2024/08/12 10:50:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_WF6_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/8ab69bdb164b4814bd6a0cc7088dd172.
2024/08/12 10:50:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.96      0.97      0.97      9651
           1       0.88      0.87      0.87      2620

    accuracy                           0.95     12271
   macro avg       0.92      0.92      0.92     12271
weighted avg       0.95      0.95      0.95     12271

Run Name: random_forest_WF6_run | Model: random_forest_WF6 | Accuracy: 0.9457 | Precision: 0.8787 | Recall: 0.8653 | F1 Score: 0.8719 | ROC AUC: 0.9850


2024/08/12 10:50:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run logistic_regression_WF7_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/7e53b55d1cea42868a2d713760bfb414.
2024/08/12 10:50:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.99      0.99      0.99      9651
           1       0.95      0.96      0.96      2620

    accuracy                           0.98     12271
   macro avg       0.97      0.97      0.97     12271
weighted avg       0.98      0.98      0.98     12271

Run Name: logistic_regression_WF7_run | Model: logistic_regression_WF7 | Accuracy: 0.9818 | Precision: 0.9545 | Recall: 0.9607 | F1 Score: 0.9576 | ROC AUC: 0.9970


2024/08/12 10:50:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest_WF7_run at: http://mlflow:5001/#/experiments/213449315161875335/runs/f2f27a70fb7843a784f17b30bbac1994.
2024/08/12 10:50:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5001/#/experiments/213449315161875335.


              precision    recall  f1-score   support

           0       0.98      0.98      0.98      9651
           1       0.93      0.93      0.93      2620

    accuracy                           0.97     12271
   macro avg       0.96      0.96      0.96     12271
weighted avg       0.97      0.97      0.97     12271

Run Name: random_forest_WF7_run | Model: random_forest_WF7 | Accuracy: 0.9703 | Precision: 0.9299 | Recall: 0.9313 | F1 Score: 0.9306 | ROC AUC: 0.9942
