# **Import Required Libraries**

In [19]:
# import needed libraries
import sys
from pathlib import Path 

project_root = Path().resolve().parent 
sys.path.insert(0, str(project_root)) # set sys.path to the root folder so we can import module from other directories

from src import utils
from copy import deepcopy
from sklearn.preprocessing import OneHotEncoder
from src.utils import serialize_data
import numpy as np
import pandas as pd
from typing import Any

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

# **Load Dataset**

In [20]:
# data path
X_train_path = project_root/'data'/'interim'/'X_train.pkl'
y_train_path = project_root/'data'/'interim'/'y_train.pkl'
X_valid_path = project_root/'data'/'interim'/'X_valid.pkl'
y_valid_path = project_root/'data'/'interim'/'y_valid.pkl'
X_test_path = project_root/'data'/'interim'/'X_test.pkl'
y_test_path = project_root/'data'/'interim'/'y_test.pkl'

# deserialize the data
X_train = utils.deserialize_data(path=X_train_path)
y_train = utils.deserialize_data(path=y_train_path)
X_valid = utils.deserialize_data(path=X_valid_path)
y_valid = utils.deserialize_data(path=y_valid_path)
X_test = utils.deserialize_data(path=X_test_path)
y_test = utils.deserialize_data(path=y_test_path)

Load object. . .
/home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/interim/X_train.pkl has been successfully loaded!.
Load object. . .
/home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/interim/y_train.pkl has been successfully loaded!.
Load object. . .
/home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/interim/X_valid.pkl has been successfully loaded!.
Load object. . .
/home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/interim/y_valid.pkl has been successfully loaded!.
Load object. . .
/home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/interim/X_test.pkl has been successfully loaded!.
Load object. . .
/home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/interim/y_test.pkl has been successfully loaded!.


In [21]:
print('X train shape origin:', X_train.shape)
X_train.head()

X train shape origin: (26064, 11)


Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
15884,25,241875,MORTGAGE,4.0,EDUCATION,A,16000,7.05,0.07,N,4
15138,21,18000,RENT,5.0,PERSONAL,B,1500,12.18,0.08,N,4
7474,25,53000,MORTGAGE,10.0,MEDICAL,B,16000,12.53,0.3,N,2
18212,28,16800,OWN,,MEDICAL,C,5000,13.98,0.3,N,8
6493,25,50000,MORTGAGE,2.0,VENTURE,A,10000,7.9,0.2,N,2


# **Drop Data Duplicates**

In [22]:
# make a defined function of drop data duplicates
def drop_duplicate_data(X: pd.DataFrame, y: pd.Series):
    
    """
    Remove duplicate rows from a feature dataset while keeping the target labels aligned.

    Parameters
    ----------
    X : pd.DataFrame
        Feature dataset (train/validation/test) from which duplicate rows
        will be removed.

    y : pd.Series
        Target labels corresponding to the rows in X. This series will be
        filtered using the updated index after duplicate removal.

    Returns
    -------
    X : pd.DataFrame
        Cleaned feature dataset with duplicate rows removed.

    y : pd.Series
        Target series aligned with the cleaned feature dataset.
    """

    # validation input data
    if not isinstance(X, pd.DataFrame):
        raise TypeError('Parameter X should be in the dataframe type.')
    elif not isinstance(y, pd.Series):
        raise TypeError('Parameter y should be in the series type.')
    else:
        print('Function drop_duplicate_data: parameter has been successfully validated.')

    # copy X and y so the data keep immutable
    X = X.copy() 
    y = y.copy()

    # check initial X data shape
    print(f'Function drop_duplicate_data: data shape before dropping duplicate: {X.shape}')

    # check data duplicates
    duplicate_filter = X.duplicated()
    X_duplicate = X[duplicate_filter==True]
    print(f'Function drop_duplicate_data: duplicated data shape is {X_duplicate.shape}')

    # predict data shape after dropping duplicates
    n_drop = int(X.duplicated().sum())
    X_clean = (X.shape[0]-n_drop, X.shape[1])
    print(f'Function drop_duplicate_data: dataset shape after dropping duplicates should be {X_clean}')

    # drop duplicate
    X.drop_duplicates(inplace=True)
    y = y.loc[X.index]

    # shape after dropping
    print(f'\nFunction drop_duplicate_data: dataset shape after dropping duplicate is {X.shape}')

    return X, y

In [23]:
# execute drop data duplicates function
X_train, y_train = drop_duplicate_data(X=X_train, y=y_train)

Function drop_duplicate_data: parameter has been successfully validated.
Function drop_duplicate_data: data shape before dropping duplicate: (26064, 11)
Function drop_duplicate_data: duplicated data shape is (96, 11)
Function drop_duplicate_data: dataset shape after dropping duplicates should be (25968, 11)

Function drop_duplicate_data: dataset shape after dropping duplicate is (25968, 11)


# **Numerical Imputation**

In [24]:
def median_imputation(data: pd.DataFrame, subset_data: Any, fit: bool) -> Any:
    
    """
    Perform median imputation for numerical columns.

    Parameters
    ----------
    data : pd.DataFrame
        Dataset (train/test/valid) that will be imputed.
    subset_data : list or dict
        - If fit=True  -> list of column names to calculate median.
        - If fit=False -> dictionary with column names as keys and median values as values.
    fit : bool
        - True  -> calculate median values (fitting stage).
        - False -> impute missing values using previously calculated medians.

    Returns
    -------
    dict or pd.DataFrame
        - If fit=True  -> returns dictionary of median values.
        - If fit=False -> returns imputed dataframe.
    """

    # input data type validation
    if not isinstance(data, pd.DataFrame):
        raise RuntimeError('Function median_imputation: Data parameter should be in the dataframe type.')
    if fit == True:
        if not isinstance(subset_data, list):
            raise RuntimeError('Function median_imputation: When the fit parameter is set to True, subset_data must be of type list and should contain a list of column names for which the median values will be calculated and used as imputation values for those columns.')
    elif fit == False:
        if not isinstance(subset_data, dict):
            raise RuntimeError('Function median_imputation: When the fit parameter is set to False, subset_data must be of type dictionary and should contain column names as the dict key and median values related to the column as the dict value.')
    else:
        raise RuntimeError('Function median_imputation: Fit parameter should be in bool data type with the True or False value.')
    
    print('Function median_imputation: Parameter has been succesfully validated.')

    # copy the data & subset_data so remain immutable
    data = data.copy()
    subset_data = deepcopy(subset_data)

    # fit stage
    if fit == True:
        imputation_data = dict()
        for subset in subset_data:
            imputation_data[subset] = data[subset].median()  
        print(f'Function median_imputation: Fitting process has been done, here is the result: {imputation_data}')
        
        return imputation_data

    # imputation stage
    elif fit == False:
        print('Function median_imputation: Count NA information before imputation process:')
        print(data.isna().sum())
        print('')

        data.fillna(subset_data, inplace=True)
        print('Function median_imputation: Count NA information after imputation process:')
        print(data.isna().sum())
        print('')
        return data

In [25]:
# fit median imputation
subset_data = ['person_emp_length', 'loan_int_rate']
subset_data = median_imputation(
    data=X_train,
    subset_data=subset_data,
    fit=True
)

Function median_imputation: Parameter has been succesfully validated.
Function median_imputation: Fitting process has been done, here is the result: {'person_emp_length': np.float64(4.0), 'loan_int_rate': np.float64(10.99)}


In [26]:
# imputation using fitted data on X_train
X_train = median_imputation(
    data=X_train,
    subset_data=subset_data,
    fit=False
)

Function median_imputation: Parameter has been succesfully validated.
Function median_imputation: Count NA information before imputation process:
person_age                       0
person_income                    0
person_home_ownership            0
person_emp_length              734
loan_intent                      0
loan_grade                       0
loan_amnt                        0
loan_int_rate                 2491
loan_percent_income              0
cb_person_default_on_file        0
cb_person_cred_hist_length       0
dtype: int64

Function median_imputation: Count NA information after imputation process:
person_age                    0
person_income                 0
person_home_ownership         0
person_emp_length             0
loan_intent                   0
loan_grade                    0
loan_amnt                     0
loan_int_rate                 0
loan_percent_income           0
cb_person_default_on_file     0
cb_person_cred_hist_length    0
dtype: int64



In [27]:
# imputation using fitted data X_valid
X_valid = median_imputation(
    data=X_valid,
    subset_data=subset_data,
    fit=False
)

Function median_imputation: Parameter has been succesfully validated.
Function median_imputation: Count NA information before imputation process:
person_age                      0
person_income                   0
person_home_ownership           0
person_emp_length              80
loan_intent                     0
loan_grade                      0
loan_amnt                       0
loan_int_rate                 312
loan_percent_income             0
cb_person_default_on_file       0
cb_person_cred_hist_length      0
dtype: int64

Function median_imputation: Count NA information after imputation process:
person_age                    0
person_income                 0
person_home_ownership         0
person_emp_length             0
loan_intent                   0
loan_grade                    0
loan_amnt                     0
loan_int_rate                 0
loan_percent_income           0
cb_person_default_on_file     0
cb_person_cred_hist_length    0
dtype: int64



In [28]:
# imputation using fitted data X_test
X_test = median_imputation(
    data=X_test,
    subset_data=subset_data,
    fit=False
)

Function median_imputation: Parameter has been succesfully validated.
Function median_imputation: Count NA information before imputation process:
person_age                      0
person_income                   0
person_home_ownership           0
person_emp_length              77
loan_intent                     0
loan_grade                      0
loan_amnt                       0
loan_int_rate                 303
loan_percent_income             0
cb_person_default_on_file       0
cb_person_cred_hist_length      0
dtype: int64

Function median_imputation: Count NA information after imputation process:
person_age                    0
person_income                 0
person_home_ownership         0
person_emp_length             0
loan_intent                   0
loan_grade                    0
loan_amnt                     0
loan_int_rate                 0
loan_percent_income           0
cb_person_default_on_file     0
cb_person_cred_hist_length    0
dtype: int64



# **Categorical Encoding**

In [29]:
# make fit encoder
def create_onehot_encoder(categories: list, path: str) -> OneHotEncoder:
    """
    Create, fit, and save a OneHotEncoder object based on predefined categories.

    Parameters
    ----------
    categories : list
        A list of categorical values that will be used to fit the encoder.
        These categories represent all possible values that should be encoded.

    path : str
        The file path where the trained OneHotEncoder object will be saved
        (e.g., "../models/ohe_home_ownership.pkl").

    Returns
    -------
    OneHotEncoder
        A fitted OneHotEncoder instance that can be reused later for
        transforming categorical data consistently.
    """

    # validate parameters
    if not isinstance(categories, list):
        raise RuntimeError('Function create_onehot_encoder: Categories parameter must be in the list data type, containing the categories to encode.')
    elif not isinstance(path, str):
        raise RuntimeError('Function create_onehot_encoder: Path parameter must be in the str data type, containing the saved encoder directory.')
    print('Function create_onehot_encoder: Parameters have been successfully validated.')

    # fit encoder
    ohe = OneHotEncoder()
    ohe.fit(np.array(categories).reshape(-1,1))

    # save encoder
    serialize_data(data=ohe, path=path)

    # print fitted categories
    print(f'Categories have been fitted: {ohe.categories_[0].tolist()}\n')
    return ohe

In [30]:
person_home_ownership = X_train['person_home_ownership'].unique().tolist()
ohe_home_ownership = create_onehot_encoder(categories=person_home_ownership, path='../models/ohe_home_ownership.pkl')

loan_intent = X_train['loan_intent'].unique().tolist()
ohe_loan_intent = create_onehot_encoder(categories=loan_intent, path='../models/ohe_loan_intent.pkl')

loan_grade = X_train['loan_grade'].unique().tolist()
ohe_loan_grade = create_onehot_encoder(categories=loan_grade, path='../models/ohe_loan_grade.pkl')

cb_person_default_on_file = X_train['cb_person_default_on_file'].unique().tolist()
ohe_default_on_file = create_onehot_encoder(categories=cb_person_default_on_file, path='../models/ohe_default_on_file.pkl')

Function create_onehot_encoder: Parameters have been successfully validated.
Saving object. . .
Your object has been successfully saved and stored into: ../models/ohe_home_ownership.pkl

Categories have been fitted: ['MORTGAGE', 'OTHER', 'OWN', 'RENT']

Function create_onehot_encoder: Parameters have been successfully validated.
Saving object. . .
Your object has been successfully saved and stored into: ../models/ohe_loan_intent.pkl

Categories have been fitted: ['DEBTCONSOLIDATION', 'EDUCATION', 'HOMEIMPROVEMENT', 'MEDICAL', 'PERSONAL', 'VENTURE']

Function create_onehot_encoder: Parameters have been successfully validated.
Saving object. . .
Your object has been successfully saved and stored into: ../models/ohe_loan_grade.pkl

Categories have been fitted: ['A', 'B', 'C', 'D', 'E', 'F', 'G']

Function create_onehot_encoder: Parameters have been successfully validated.
Saving object. . .
Your object has been successfully saved and stored into: ../models/ohe_default_on_file.pkl

Categor

In [31]:
# make transform encoder
def ohe_transform(dataset: pd.DataFrame, subset: str, prefix: str, ohe: OneHotEncoder):
    
    """
    Transform a categorical column into one-hot encoded features using a fitted OneHotEncoder.

    Parameters
    ----------
    dataset : pd.DataFrame
        The input dataset containing the categorical column to be encoded.

    subset : str
        The name of the categorical column in `dataset` that will be transformed.

    prefix : str
        The prefix that will be added to the generated encoded column names.
        Example: prefix="loan_grade" will produce columns such as
        "loan_grade_A", "loan_grade_B", etc.

    ohe : OneHotEncoder
        A fitted sklearn.preprocessing.OneHotEncoder object.
        The encoder must already be trained on the corresponding categorical feature.

    Returns
    -------
    pd.DataFrame
        A new DataFrame where the selected categorical column has been replaced
        by its one-hot encoded representation.
    """

    # validation
    if not isinstance(dataset, pd.DataFrame):
        raise RuntimeError('Function ohe_transform: Dataset parameter must be in the dataframe type.')
    elif not isinstance(ohe, OneHotEncoder):
        raise RuntimeError('Function ohe_transform: OHE parameter must be in the OneHotEncoder type.')
    elif not isinstance(prefix, str):
        raise RuntimeError('Function ohe_transform: Prefix parameter must be in the str type.')
    elif not isinstance(subset, str):
        raise RuntimeError('Function ohe_transform: Subset parameter must be in the str type.')

    # check subset exists
    try:
        dataset.columns.tolist().index(subset)
    except:
        raise RuntimeError('Function ohe_transform: Subset parameter is string, but it could not be found in the column list on dataset.')
    
    print('Function ohe_transform: Parameters have been successfully validated.')
    
    # copy dataset
    dataset = dataset.copy()

    # columns before encoding
    print(f'Function ohe_transform: List of data columns before encoding: {dataset.columns.tolist()}')
    
    # create encoded column names
    col_names = [
        f'{prefix}_{col_name}' for col_name in ohe.categories_[0]
    ]

    # transform
    transformed = ohe.transform(dataset[[subset]]).toarray()
    encoded = pd.DataFrame(data=transformed,
                           columns=col_names,
                           index=dataset.index)
    
    # concat each encoded subset
    dataset = pd.concat([dataset, encoded], axis=1)

    # drop original column
    dataset.drop(columns=[subset], inplace=True)

    # columns after encoding
    print(f'Function ohe_transform: Column that have been successfully encoded: {dataset.columns.tolist()}')

    return dataset

In [32]:
# encode X_train
X_train = ohe_transform(
    dataset=X_train,
    subset="person_home_ownership",
    prefix="home_ownership",
    ohe=ohe_home_ownership
)

X_train = ohe_transform(
    dataset=X_train,
    subset="loan_intent",
    prefix="loan_intent",
    ohe=ohe_loan_intent
)

X_train = ohe_transform(
    dataset=X_train,
    subset="loan_grade",
    prefix="loan_grade",
    ohe=ohe_loan_grade
)

X_train = ohe_transform(
    dataset=X_train,
    subset="cb_person_default_on_file",
    prefix="default_onfile",
    ohe=ohe_default_on_file
)

Function ohe_transform: Parameters have been successfully validated.
Function ohe_transform: List of data columns before encoding: ['person_age', 'person_income', 'person_home_ownership', 'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file', 'cb_person_cred_hist_length']
Function ohe_transform: Column that have been successfully encoded: ['person_age', 'person_income', 'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file', 'cb_person_cred_hist_length', 'home_ownership_MORTGAGE', 'home_ownership_OTHER', 'home_ownership_OWN', 'home_ownership_RENT']
Function ohe_transform: Parameters have been successfully validated.
Function ohe_transform: List of data columns before encoding: ['person_age', 'person_income', 'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file

In [33]:
# encode X_valid

X_valid = ohe_transform(
    dataset=X_valid,
    subset="person_home_ownership",
    prefix="home_ownership",
    ohe=ohe_home_ownership
)

X_valid = ohe_transform(
    dataset=X_valid,
    subset="loan_intent",
    prefix="loan_intent",
    ohe=ohe_loan_intent
)

X_valid = ohe_transform(
    dataset=X_valid,
    subset="loan_grade",
    prefix="loan_grade",
    ohe=ohe_loan_grade
)

X_valid = ohe_transform(
    dataset=X_valid,
    subset="cb_person_default_on_file",
    prefix="default_onfile",
    ohe=ohe_default_on_file
)

Function ohe_transform: Parameters have been successfully validated.
Function ohe_transform: List of data columns before encoding: ['person_age', 'person_income', 'person_home_ownership', 'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file', 'cb_person_cred_hist_length']
Function ohe_transform: Column that have been successfully encoded: ['person_age', 'person_income', 'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file', 'cb_person_cred_hist_length', 'home_ownership_MORTGAGE', 'home_ownership_OTHER', 'home_ownership_OWN', 'home_ownership_RENT']
Function ohe_transform: Parameters have been successfully validated.
Function ohe_transform: List of data columns before encoding: ['person_age', 'person_income', 'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file

Function ohe_transform: Column that have been successfully encoded: ['person_age', 'person_income', 'person_emp_length', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file', 'cb_person_cred_hist_length', 'home_ownership_MORTGAGE', 'home_ownership_OTHER', 'home_ownership_OWN', 'home_ownership_RENT', 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 'loan_grade_F', 'loan_grade_G']
Function ohe_transform: Parameters have been successfully validated.
Function ohe_transform: List of data columns before encoding: ['person_age', 'person_income', 'person_emp_length', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file', 'cb_person_cred_hist_length', 'home_ownership_MORTGAGE', 'home_ownership_OTHER', 'home_ownership_OWN', 'home_ownership_RENT', 'loan_in

In [34]:
# encode X_test

X_test = ohe_transform(
    dataset=X_test,
    subset="person_home_ownership",
    prefix="home_ownership",
    ohe=ohe_home_ownership
)

X_test = ohe_transform(
    dataset=X_test,
    subset="loan_intent",
    prefix="loan_intent",
    ohe=ohe_loan_intent
)

X_test = ohe_transform(
    dataset=X_test,
    subset="loan_grade",
    prefix="loan_grade",
    ohe=ohe_loan_grade
)

X_test = ohe_transform(
    dataset=X_test,
    subset="cb_person_default_on_file",
    prefix="default_onfile",
    ohe=ohe_default_on_file
)

Function ohe_transform: Parameters have been successfully validated.
Function ohe_transform: List of data columns before encoding: ['person_age', 'person_income', 'person_home_ownership', 'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file', 'cb_person_cred_hist_length']
Function ohe_transform: Column that have been successfully encoded: ['person_age', 'person_income', 'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file', 'cb_person_cred_hist_length', 'home_ownership_MORTGAGE', 'home_ownership_OTHER', 'home_ownership_OWN', 'home_ownership_RENT']
Function ohe_transform: Parameters have been successfully validated.
Function ohe_transform: List of data columns before encoding: ['person_age', 'person_income', 'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_default_on_file

In [35]:
# sanity check X_train after encoding
print('X_train after encoding process:', X_train.shape)
X_train.head()

X_train after encoding process: (25968, 26)


Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,home_ownership_MORTGAGE,home_ownership_OTHER,home_ownership_OWN,...,loan_intent_VENTURE,loan_grade_A,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G,default_onfile_N,default_onfile_Y
15884,25,241875,4.0,16000,7.05,0.07,4,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
15138,21,18000,5.0,1500,12.18,0.08,4,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
7474,25,53000,10.0,16000,12.53,0.3,2,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
18212,28,16800,4.0,5000,13.98,0.3,8,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
6493,25,50000,2.0,10000,7.9,0.2,2,1.0,0.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


# **Serialize Encoded Data**

In [36]:
# serialize X_train
serialize_data(data=X_train, path=project_root/'data'/'processed'/'X_train_prep.pkl')

# serialize X_valid
serialize_data(data=X_valid, path=project_root/'data'/'processed'/'X_valid_prep.pkl')

# serialize X_test
serialize_data(data=X_test, path=project_root/'data'/'processed'/'X_test_prep.pkl')

# serialize y_train
serialize_data(data=y_train, path=project_root/'data'/'processed'/'y_train_prep.pkl')

Saving object. . .
Your object has been successfully saved and stored into: /home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/processed/X_train_prep.pkl

Saving object. . .
Your object has been successfully saved and stored into: /home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/processed/X_valid_prep.pkl

Saving object. . .
Your object has been successfully saved and stored into: /home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/processed/X_test_prep.pkl

Saving object. . .
Your object has been successfully saved and stored into: /home/bagaskoroah/ml_process/BAGAS_MLPROCESS/data/processed/y_train_prep.pkl

