In [1]:
# !pip install tqdm

In [2]:
# !pip install pycaret

In [3]:
# !pip install pycaret[all]

In [4]:
import pandas as pd
import numpy as np
import pickle

from tqdm import tqdm
tqdm.pandas()

import os
from pprint import pprint
import re
from collections import defaultdict

In [5]:
DATA_ADDRESS = "./data"
os.listdir(DATA_ADDRESS)

['.gitkeep',
 'correlation_df_mean_no_clean.csv',
 'correlation_df_std_no_clean.csv',
 'df_5s.pkl',
 'df_5s_source.pkl',
 'df_merged.pkl',
 'df_merged_pivot.xlsx',
 'df_processed_descriptive_statistics.pickle',
 'df_processed_descriptive_statistics.pkl',
 'df_processed_descriptive_statistics_mel40_mfcc20.pkl',
 'df_processed_simple.pkl',
 'df_processed_stat_no_trim_cleaned_fft2048_mel128_mfcc20.pkl',
 'df_processed_stat_no_trim_cleaned_fft2048_mel40_mfcc17.pkl',
 'df_processed_stat_no_trim_cleaned_fft512_mel64_mfcc17.pkl',
 'df_processed_stat_no_trim_no_clean_fft2048_mel128_mfcc20.pkl',
 'df_processed_stat_no_trim_no_clean_fft512_mel128_mfcc17.pkl',
 'preprocessed',
 'spearman_correlation_df_mean_cleaned.csv',
 'spearman_correlation_df_mean_no_clean.csv',
 'spearman_correlation_df_std_cleaned.csv',
 'spearman_correlation_df_std_no_clean.csv',
 'VOiCES_devkit']

In [6]:
TRAIN_DIR = os.path.join(DATA_ADDRESS,'preprocessed','train')
os.listdir(TRAIN_DIR)

['mel_len15_fft2048_mels128_mfcc17_0-499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_1000-1499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_10000-10499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_10500-10999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_11000-11499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_11500-11999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_12000-12499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_12500-12799.pkl',
 'mel_len15_fft2048_mels128_mfcc17_1500-1999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_2000-2499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_2500-2999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_3000-3499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_3500-3999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_4000-4499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_4500-4999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_500-999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_5000-5499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_5500-5999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_6000-6499.pkl',
 'mel_len15_fft2048_mels1

## Load data

In [7]:
def load_pickle(filename):
    with open(filename, 'rb') as f:
        obj = pickle.load(f)
    return obj
def load_train(train_files):
    ls_X_trian = []
    for k,v in tqdm(train_files.items()):  
        train_file = v['file_name']
        file_dir = os.path.join(TRAIN_DIR, train_file)
        ls_X_trian.append(load_pickle(file_dir))
    return np.concatenate(ls_X_trian, axis=0)

### X_train

In [8]:

file_prefix = 'source_mfcc_len5_fft2048_mels128_mfcc17_'

# List all files in the directory
files = os.listdir(TRAIN_DIR)
pprint(files)



['mel_len15_fft2048_mels128_mfcc17_0-499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_1000-1499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_10000-10499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_10500-10999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_11000-11499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_11500-11999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_12000-12499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_12500-12799.pkl',
 'mel_len15_fft2048_mels128_mfcc17_1500-1999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_2000-2499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_2500-2999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_3000-3499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_3500-3999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_4000-4499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_4500-4999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_500-999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_5000-5499.pkl',
 'mel_len15_fft2048_mels128_mfcc17_5500-5999.pkl',
 'mel_len15_fft2048_mels128_mfcc17_6000-6499.pkl',
 'mel_len15_fft2048_mels1

In [9]:
# Filter files that start with the specified prefix
train_files_mfcc = [file for file in files if file.startswith(file_prefix)]
pprint(train_files_mfcc)

['source_mfcc_len5_fft2048_mels128_mfcc17_0-499.pkl',
 'source_mfcc_len5_fft2048_mels128_mfcc17_1000-1499.pkl',
 'source_mfcc_len5_fft2048_mels128_mfcc17_1500-1551.pkl',
 'source_mfcc_len5_fft2048_mels128_mfcc17_500-999.pkl']


In [10]:
# Regular expression to match two numbers at the end of the file name
pattern = re.compile(r'_(\d+)-(\d+)\.pkl$')
n_interval = 500

# Extract the numbers
dir_files = defaultdict(dict)
for file_name in train_files_mfcc:
    match = pattern.search(file_name)
    n1 = int(match.group(1))
    n2 = int(match.group(2))
    file_num = n1/n_interval
    dir_files[file_num] = {
        'begin':n1,
        'end':n2,
        'file_name' : file_name,
    }
sorted_dir_files = {k: dir_files[k] for k in sorted(dir_files)}
pprint(sorted_dir_files)

{0.0: {'begin': 0,
       'end': 499,
       'file_name': 'source_mfcc_len5_fft2048_mels128_mfcc17_0-499.pkl'},
 1.0: {'begin': 500,
       'end': 999,
       'file_name': 'source_mfcc_len5_fft2048_mels128_mfcc17_500-999.pkl'},
 2.0: {'begin': 1000,
       'end': 1499,
       'file_name': 'source_mfcc_len5_fft2048_mels128_mfcc17_1000-1499.pkl'},
 3.0: {'begin': 1500,
       'end': 1551,
       'file_name': 'source_mfcc_len5_fft2048_mels128_mfcc17_1500-1551.pkl'}}


In [11]:
X = load_train(sorted_dir_files)
X.shape

100%|██████████| 4/4 [00:00<00:00, 592.86it/s]


(1552, 17, 216)

### y_train

In [12]:
df_raw = pd.read_pickle(os.path.join(DATA_ADDRESS,'df_5s_source.pkl'))
df_raw

Unnamed: 0,origin_folder,speaker,category,source
0,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
1,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
2,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
3,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
4,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
...,...,...,...,...
2322,source-16k/train/sp5456,5456,train,5seconds-16k-source/train/sp5456/Lab41-SRI-VOi...
2323,source-16k/train/sp5456,5456,train,5seconds-16k-source/train/sp5456/Lab41-SRI-VOi...
2324,source-16k/train/sp5456,5456,train,5seconds-16k-source/train/sp5456/Lab41-SRI-VOi...
2325,source-16k/train/sp5456,5456,train,5seconds-16k-source/train/sp5456/Lab41-SRI-VOi...


In [13]:
df_raw[df_raw['category'] == 'train']

Unnamed: 0,origin_folder,speaker,category,source
0,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
1,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
2,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
3,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
4,source-16k/train/sp0242,0242,train,5seconds-16k-source/train/sp0242/Lab41-SRI-VOi...
...,...,...,...,...
2322,source-16k/train/sp5456,5456,train,5seconds-16k-source/train/sp5456/Lab41-SRI-VOi...
2323,source-16k/train/sp5456,5456,train,5seconds-16k-source/train/sp5456/Lab41-SRI-VOi...
2324,source-16k/train/sp5456,5456,train,5seconds-16k-source/train/sp5456/Lab41-SRI-VOi...
2325,source-16k/train/sp5456,5456,train,5seconds-16k-source/train/sp5456/Lab41-SRI-VOi...


In [14]:
y = np.array(df_raw[df_raw['category']=='train']['speaker']).astype('float32')
print(y.shape)
y

(1552,)


array([ 242.,  242.,  242., ..., 5456., 5456., 5456.], dtype=float32)

## Modeling

### pycaret

In [15]:
# Flatten X
X = X.reshape(X.shape[0], -1)
X.shape

(1552, 3672)

In [16]:
# Convert the flattened_array to a pandas DataFrame
X_df = pd.DataFrame(X)

# Convert the 1D array y to a pandas Series
y_series = pd.Series(y, name='target')

# Concatenate the features and target into a single DataFrame
data = pd.concat([X_df, y_series], axis=1)

print(data.shape)
# Check the first few rows of the DataFrame
print(data.head())

(1552, 3673)
           0          1          2         3          4          5          6  \
0 -54.000553 -52.922588 -55.730331 -54.19994 -45.331898 -40.756733 -43.434921   
1 -54.000553 -52.922588 -55.730331 -54.19994 -45.331898 -40.756733 -43.434921   
2 -54.000553 -52.922588 -55.730331 -54.19994 -45.331898 -40.756733 -43.434921   
3 -54.000553 -52.922588 -55.730331 -54.19994 -45.331898 -40.756733 -43.434921   
4 -54.000553 -52.922588 -55.730331 -54.19994 -45.331898 -40.756733 -43.434921   

           7          8          9  ...      3663      3664      3665  \
0 -49.534546 -53.188679 -52.039848  ...  0.023779  0.023779  0.023779   
1 -49.534546 -53.188679 -52.039848  ...  0.023779  0.023779  0.023779   
2 -49.534546 -53.188679 -52.039848  ...  0.023779  0.023779  0.023779   
3 -49.534546 -53.188679 -52.039848  ...  0.023779  0.023779  0.023779   
4 -49.534546 -53.188679 -52.039848  ...  0.023779  0.023779  0.023779   

       3666      3667      3668      3669      3670      3671

In [17]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3663,3664,3665,3666,3667,3668,3669,3670,3671,target
0,-54.000553,-52.922588,-55.730331,-54.199940,-45.331898,-40.756733,-43.434921,-49.534546,-53.188679,-52.039848,...,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,242.0
1,-54.000553,-52.922588,-55.730331,-54.199940,-45.331898,-40.756733,-43.434921,-49.534546,-53.188679,-52.039848,...,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,242.0
2,-54.000553,-52.922588,-55.730331,-54.199940,-45.331898,-40.756733,-43.434921,-49.534546,-53.188679,-52.039848,...,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,242.0
3,-54.000553,-52.922588,-55.730331,-54.199940,-45.331898,-40.756733,-43.434921,-49.534546,-53.188679,-52.039848,...,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,242.0
4,-54.000553,-52.922588,-55.730331,-54.199940,-45.331898,-40.756733,-43.434921,-49.534546,-53.188679,-52.039848,...,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,242.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1547,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-83.888466,-63.411808,...,0.023779,0.024211,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,5456.0
1548,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-83.888466,-63.411808,...,0.023779,0.024211,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,5456.0
1549,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-83.888466,-63.411808,...,0.023779,0.024211,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,5456.0
1550,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-84.051529,-83.888466,-63.411808,...,0.023779,0.024211,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,0.023779,5456.0


In [18]:
from pycaret.classification import *
# Initialize the setup

In [19]:
# from pycaret.datasets import get_data
# data = get_data('diabetes')

In [20]:
clf_setup = setup(data=data, target='target', session_id=123, use_gpu = True,system_log=True,fold=5)


[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> i

Unnamed: 0,Description,Value
0,Session id,123
1,Target,target
2,Target type,Multiclass
3,Target mapping,"32.0: 0, 83.0: 1, 93.0: 2, 112.0: 3, 122.0: 4, 150.0: 5, 159.0: 6, 174.0: 7, 188.0: 8, 196.0: 9, 198.0: 10, 204.0: 11, 205.0: 12, 208.0: 13, 209.0: 14, 224.0: 15, 226.0: 16, 240.0: 17, 242.0: 18, 248.0: 19, 250.0: 20, 254.0: 21, 288.0: 22, 296.0: 23, 307.0: 24, 403.0: 25, 459.0: 26, 472.0: 27, 479.0: 28, 480.0: 29, 492.0: 30, 510.0: 31, 597.0: 32, 636.0: 33, 637.0: 34, 652.0: 35, 770.0: 36, 868.0: 37, 882.0: 38, 887.0: 39, 948.0: 40, 949.0: 41, 1050.0: 42, 1052.0: 43, 1066.0: 44, 1112.0: 45, 1116.0: 46, 1121.0: 47, 1160.0: 48, 1182.0: 49, 1212.0: 50, 1235.0: 51, 1246.0: 52, 1259.0: 53, 1271.0: 54, 1272.0: 55, 1335.0: 56, 1383.0: 57, 1392.0: 58, 1417.0: 59, 1425.0: 60, 1472.0: 61, 1536.0: 62, 1607.0: 63, 1737.0: 64, 1841.0: 65, 1851.0: 66, 1867.0: 67, 1874.0: 68, 1926.0: 69, 1961.0: 70, 1963.0: 71, 1970.0: 72, 2012.0: 73, 2060.0: 74, 2074.0: 75, 2093.0: 76, 2110.0: 77, 2149.0: 78, 2156.0: 79, 2162.0: 80, 2269.0: 81, 2285.0: 82, 2289.0: 83, 2294.0: 84, 2412.0: 85, 2481.0: 86, 2532.0: 87, 2573.0: 88, 2673.0: 89, 2691.0: 90, 2758.0: 91, 2764.0: 92, 2803.0: 93, 2911.0: 94, 3235.0: 95, 3368.0: 96, 3446.0: 97, 3483.0: 98, 3521.0: 99, 3549.0: 100, 3645.0: 101, 3835.0: 102, 3923.0: 103, 3972.0: 104, 3989.0: 105, 3994.0: 106, 4010.0: 107, 4014.0: 108, 4057.0: 109, 4064.0: 110, 4110.0: 111, 4116.0: 112, 4145.0: 113, 4160.0: 114, 4331.0: 115, 4427.0: 116, 4438.0: 117, 4441.0: 118, 4535.0: 119, 4586.0: 120, 4590.0: 121, 4744.0: 122, 4839.0: 123, 4848.0: 124, 4859.0: 125, 4957.0: 126, 4967.0: 127, 5126.0: 128, 5154.0: 129, 5157.0: 130, 5189.0: 131, 5319.0: 132, 5338.0: 133, 5386.0: 134, 5400.0: 135, 5401.0: 136, 5456.0: 137, 5583.0: 138, 5635.0: 139, 5678.0: 140, 5717.0: 141, 5740.0: 142, 5789.0: 143, 5802.0: 144, 5868.0: 145, 5935.0: 146, 5968.0: 147, 6099.0: 148, 6147.0: 149, 6241.0: 150, 6319.0: 151, 6385.0: 152, 6395.0: 153, 6415.0: 154, 6454.0: 155, 6519.0: 156, 6544.0: 157, 6574.0: 158, 6696.0: 159, 6788.0: 160, 6848.0: 161, 6895.0: 162, 6965.0: 163, 7000.0: 164, 7095.0: 165, 7148.0: 166, 7247.0: 167, 7264.0: 168, 7276.0: 169, 7278.0: 170, 7445.0: 171, 7498.0: 172, 7517.0: 173, 7540.0: 174, 7688.0: 175, 7704.0: 176, 7850.0: 177, 7867.0: 178, 7868.0: 179, 7881.0: 180, 7910.0: 181, 7932.0: 182, 7976.0: 183, 7981.0: 184, 7995.0: 185, 8051.0: 186, 8057.0: 187, 8108.0: 188, 8118.0: 189, 8152.0: 190, 8222.0: 191, 8225.0: 192, 8266.0: 193, 8425.0: 194, 8575.0: 195, 8605.0: 196, 8635.0: 197, 8677.0: 198, 8713.0: 199"
4,Original data shape,"(1552, 3673)"
5,Transformed data shape,"(1552, 3673)"
6,Transformed train set shape,"(1086, 3673)"
7,Transformed test set shape,"(466, 3673)"
8,Numeric features,3672
9,Preprocess,True


[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> i

In [21]:
# This function trains and evaluates different models using cross-validation and ranks them
best_model = compare_models(n_select = 3)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,1.0,0.0,1.0,1.0,1.0,1.0,1.0,65.31
nb,Naive Bayes,1.0,0.0,1.0,1.0,1.0,1.0,1.0,2.3
dt,Decision Tree Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,7.78
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.24
rf,Random Forest Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.416
gbc,Gradient Boosting Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,128.356
et,Extra Trees Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.432
xgboost,Extreme Gradient Boosting,1.0,0.0,1.0,1.0,1.0,1.0,1.0,32.692
lightgbm,Light Gradient Boosting Machine,1.0,0.0,1.0,1.0,1.0,1.0,1.0,162.056
catboost,CatBoost Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,563.198


In [None]:
for model in best_model:
    print(model)

[LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=123, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False), GaussianNB(priors=None, var_smoothing=1e-09), DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_samples_leaf=1,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       random_state=123, splitter='best')]


In [27]:
len(best_model)

3

In [32]:
evaluate_model(best_model[0])

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [29]:
evaluate_model(best_model[1])

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [30]:
evaluate_model(best_model[2])

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [31]:
save_model(best_model[0], 'best_model_0')
save_model(best_model[1], 'best_model_1')
save_model(best_model[2], 'best_model_2')


Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('label_encoding',
                  TransformerWrapperWithInverse(exclude=None, include=None,
                                                transformer=LabelEncoder())),
                 ('numerical_imputer',
                  TransformerWrapper(exclude=None,
                                     include=['0', '1', '2', '3', '4', '5', '6',
                                              '7', '8', '9', '10', '11', '12',
                                              '13', '14', '15', '16', '17', '18',
                                              '19', '20', '21', '22', '23', '24',
                                              '25', '26', '27', '28', '29',...
                                                               missing_values=nan,
                                                               strategy='most_frequent',
                                                               verbose='deprecated'))),
                 

In [None]:
print("Done")

Done
