# 模型融合(Ensemble Learning)技术

<center><img src="http://ml2022.oss-cn-hangzhou.aliyuncs.com/img/image-20221101183239483.png" alt="image-20221101183239483" style="zoom:50%;" />

In [1]:
import joblib
import pathlib
import warnings
import time

import pandas as pd
import numpy as np

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

PROCESSED_DATA_DIR = pathlib.Path("../dataset/processed")
BASEMODEL_DIR = pathlib.Path("../app/models/base-models")
EXTERNAL_DIR = pathlib.Path("../dataset/external")

## 加载数据

### 加载数据集

In [2]:
from sklearn.model_selection import train_test_split

inputs = joblib.load(PROCESSED_DATA_DIR / "inputs.joblib")
target = joblib.load(PROCESSED_DATA_DIR / "target.joblib")
X_train, X_test, y_train, y_test = train_test_split(inputs, target, test_size=0.25, random_state=42, stratify=target)
y_train.value_counts()

fraudulent
0    12632
1     8452
Name: count, dtype: int64

### 加载预处理器

In [3]:
preprocessor = joblib.load(PROCESSED_DATA_DIR / "preprocessor.joblib")
preprocessor

### 加载基础模型

In [4]:
tree = joblib.load(BASEMODEL_DIR / "DecisionTreeClassifier.joblib")
lr = joblib.load(BASEMODEL_DIR / "LogisticRegression.joblib")
rf = joblib.load(BASEMODEL_DIR / "RandomForestClassifier.joblib")

## 投票法(Voting)

首先如果模型最终输出的是类别判别结果，则可以通过投票法进行模型融合，投票法会根据**少数服从多数**的规则进行结果输出，例如现有A、B、C、D、E五个模型对现有数据进行预测，结果如下：

<table>
  <tr>
    <th rowspan="2">样本</th>
    <th colspan="5">单模预测结果</th>
    <th colspan="2">投票结果</th>
    <th>最终预测结果</th>
  </tr>
  <tr>
    <th>模型A</th>
    <th>模型B</th>
    <th>模型C</th>
    <th>模型D</th>
    <th>模型E</th>
    <th>预测为0</th>
    <th>预测为1</th>
    <th>规则：少数服从多数</th>
  </tr>
  <tr>
    <td>1</td>
    <td>1</td>
    <td>1</td>
    <td>1</td>
    <td>1</td>
    <td>0</td>
    <td>1</td>
    <td>4</td>
    <td>1</td>
  </tr>
  <tr>
    <td>2</td>
    <td>1</td>
    <td>1</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>2</td>
    <td>3</td>
    <td>1</td>
  </tr>
  <tr>
    <td>3</td>
    <td>1</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>3</td>
    <td>2</td>
    <td>0</td>
  </tr>
  <tr>
    <td>4</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>4</td>
    <td>1</td>
    <td>0</td>
  </tr>
</table>

&emsp;&emsp;而这样的一个投票集成的过程，到底能带来多少性能上的提升呢？从理论上来说，根据[Narasimhamurthy,2003]()研究表明，在多样性构建的比较好的情况下，投票融合性能边界如下：

<center><img src="https://s2.loli.net/2022/05/20/uVrzS79LaBsJgKp.png" alt="image-20220520122231347" style="zoom:33%;" />

能够看出，在单体分类器准确率为80%左右（较为普遍的情况）时，模型投票融合能有平均约15%的准确率提升。当然，该理论实际上是基于分类器相互独立的假设推导而来，而在大多数真实场景下，该假设并不成立，因此该理论的结论可以视作一个理论上限，并不能代表一般情况。

### 硬投票(Hard Voting)

In [5]:
from sklearn.ensemble import VotingClassifier

In [6]:
estimators = [
    ("tree", tree),
    ("lr", lr),
    ("rf", rf),
]

In [7]:
hard_vc = VotingClassifier(estimators, voting="hard")
start_time = time.time()
hard_vc.fit(X_train, y_train)
end_time = time.time()
print(f"HardVotingClassifier训练花费的时间为: {(end_time-start_time):.4f} 秒")

[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   5.6s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.9s
[Pipeline] ..... (step 2 of 2) Processing SGDClassifier, total=   0.3s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.7s
[Pipeline]  (step 2 of 2) Processing RandomForestClassifier, total=   3.8s
HardVotingClassifier训练花费的时间为: 19.3394 秒


In [8]:
from sklearn.metrics import cohen_kappa_score, recall_score, roc_auc_score

cohen_kappa_score_list = []
recall_score_list = []
roc_auc_score_list = []

In [9]:
cohen_kappa_score_list.append(cohen_kappa_score(y_test, hard_vc.predict(X_test)))
recall_score_list.append(recall_score(y_test, hard_vc.predict(X_test)))
roc_auc_score_list.append(roc_auc_score(y_test, hard_vc.predict(X_test)))

### 软投票(Soft Voting)

In [10]:
soft_vc = VotingClassifier(estimators, voting="soft")
start_time = time.time()
soft_vc.fit(X_train, y_train)
end_time = time.time()
print(f"SoftVotingClassifier 训练花费的时间为: {(end_time-start_time):.4f} 秒")

[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.3s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.3s
[Pipeline] ..... (step 2 of 2) Processing SGDClassifier, total=   0.2s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.0s
[Pipeline]  (step 2 of 2) Processing RandomForestClassifier, total=   3.8s
SoftVotingClassifier 训练花费的时间为: 16.7130 秒


In [11]:
cohen_kappa_score_list.append(cohen_kappa_score(y_test, soft_vc.predict(X_test)))
recall_score_list.append(recall_score(y_test, soft_vc.predict(X_test)))
roc_auc_score_list.append(roc_auc_score(y_test, soft_vc.predict(X_test)))

## 均值法

### 权重设计策略

#### 平均为主，博采众长

In [12]:
weight1 = tree.score(X_train, y_train)
weight2 = lr.score(X_train, y_train)
weight3 = rf.score(X_train, y_train)
weights = [weight1, weight2, weight3]
weights

[0.9611079491557579, 0.9718744071333713, 1.0]

In [13]:
soft_vc_weight = VotingClassifier(
    estimators=estimators, 
    voting='soft', 
    weights=weights
)
start_time = time.time()
soft_vc_weight.fit(X_train, y_train)
end_time = time.time()
print(f"SoftVotingClassifierWithWeight训练花费的时间为: {(end_time-start_time):.4f} 秒")

[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.0s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.0s
[Pipeline] ..... (step 2 of 2) Processing SGDClassifier, total=   0.2s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.4s
[Pipeline]  (step 2 of 2) Processing RandomForestClassifier, total=   3.9s
SoftVotingClassifierWithWeight训练花费的时间为: 16.6367 秒


In [14]:
cohen_kappa_score_list.append(cohen_kappa_score(y_test, soft_vc_weight.predict(X_test)))
recall_score_list.append(recall_score(y_test, soft_vc_weight.predict(X_test))
)
roc_auc_score_list.append(roc_auc_score(y_test, soft_vc_weight.predict(X_test)))

#### 设计核心评估器与辅助评估器

In [15]:
# 由于随机森林模型在分类中表现最佳，因此设置其为核心评估器，其他两个分类模型为辅助评估器
# 核心评估器的权重设置为100，其他两个分类器的权重设置为1
weight1 = 1
weight2 = 1
weight3 = 100

weights = [weight1, weight2, weight3]
soft_vc_core_weight = VotingClassifier(
    estimators=estimators, 
    voting='soft', 
    weights=weights
)
start_time = time.time()
soft_vc_core_weight.fit(X_train, y_train)
end_time = time.time()
print(f"SoftVotingClassifierWithCoreWeight训练花费的时间为: {(end_time-start_time):.4f} 秒")

[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.1s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.2s
[Pipeline] ..... (step 2 of 2) Processing SGDClassifier, total=   0.2s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.2s
[Pipeline]  (step 2 of 2) Processing RandomForestClassifier, total=   3.7s
SoftVotingClassifierWithCoreWeight训练花费的时间为: 16.6103 秒


In [16]:
cohen_kappa_score_list.append(
    cohen_kappa_score(y_test, soft_vc_core_weight.predict(X_test))
)
recall_score_list.append(recall_score(y_test, soft_vc_core_weight.predict(X_test)))
roc_auc_score_list.append(roc_auc_score(y_test, soft_vc_core_weight.predict(X_test)))

## Stacking法

#### 手动实现StackingClassifier

构造二级学习器的训练数据

In [17]:
train_pre1 = estimators[0][1].predict(X_train)
train_pre2 = estimators[1][1].predict(X_train)
train_pre3 = estimators[2][1].predict(X_train)
train_stack = np.vstack([[train_pre1], [train_pre2], [train_pre3]]).T
test_pre1 = estimators[0][1].predict(X_test)
test_pre2 = estimators[1][1].predict(X_test)
test_pre3 = estimators[2][1].predict(X_test)
test_stack = np.vstack([[test_pre1],[test_pre2], [test_pre3]]).T

In [18]:
from sklearn.linear_model import LogisticRegression

stacking_model = LogisticRegression(random_state=42)
start_time = time.time()
stacking_model.fit(train_stack, y_train)
end_time = time.time()
print(f"ManualStackingClassifier训练花费的时间为: {(end_time-start_time):.4f} 秒")

ManualStackingClassifier训练花费的时间为: 0.0143 秒


In [19]:
cohen_kappa_score_list.append(cohen_kappa_score(y_test, stacking_model.predict(test_stack)))
recall_score_list.append(recall_score(y_test, stacking_model.predict(test_stack))
)
roc_auc_score_list.append(roc_auc_score(y_test, stacking_model.predict(test_stack)))

#### sklearn实现

StackingClassifier的核心参数解释如下：

|参数|解释|
|:--:|:--:|
|estimators|一级评估器|
|final_estimator|二级评估器，默认是逻辑回归|
|cv|一级评估器基交叉训练折数|
|stack_method|选择概率结果还是类别结果进行元学习器的训练|
|passthrough|是否额外带入原始数据特征进行元学习器的训练|

* `estimators`参数:参数结构和投票法评估器结构一致，都是需要创建一个由（模型名称、模型）所组成的一个列表。
* `final_estimator`参数：元学习器，只需要实例化一个`sklearn`中的评估器即可；
* `cv`：一级评估器交叉训练的折数，默认是五折。
* `stack_method`：可以选择元学习器的训练数据类型，可选`auto`、`predict_proba`、`decision_function`、`predict`四个不同取值。
    * `predict_proba`: 即带入样本类别概率进行训练，
    * `decision_function`：是SVM特殊的一种模型输出结果，代表样本到分割超平面的（置信）距离，同样也可以充当类似概率的作用，距离越短，则模型判断越不肯定（相当于概率越趋近于0.5）
    * `predict`：则是样本类别结果，相当于是Stacking“硬投票”
    * `auto`：默认参数取值，即根据不同模型，按照`predict_proba`>`decision_function`>`predict`的优先级进行参数选择。当然，对于逻辑回归、决策树和随机森林来说，参数输入`auto`时就是根据预测概率训练元学习器。
* `passthrough`：是否额外带入原始数据集特征进行元学习器训练，
    * 默认参数是`False`：只带入一级学习器的预测结果训练元学习器。
    * `True`：拼凑一个由一级学习器输出结果和原始特征共同拼接而成的数据集，用于元学习器的训练。该操作本质上其实是一种特征增强方法，常常用于层级堆叠结构的模型训练过程，包括某些`Boosting`、深度森林的级联训练等，都有可能用到特征增强技术

In [20]:
from sklearn.ensemble import StackingClassifier

##### StackingClassifier硬投票

In [21]:
hard_vsc = StackingClassifier(
    estimators=estimators, 
    final_estimator=LogisticRegression(random_state=42),
    stack_method="predict",
    cv=5,
)
start_time = time.time()
hard_vsc.fit(X_train, y_train)
end_time = time.time()
print(f"HardStackingClassifier训练花费的时间为: {(end_time-start_time):.4f} 秒")

[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.3s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.1s
[Pipeline] ..... (step 2 of 2) Processing SGDClassifier, total=   0.2s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.2s
[Pipeline]  (step 2 of 2) Processing RandomForestClassifier, total=   3.8s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.5s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.4s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.5s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.4s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifi

In [22]:
cohen_kappa_score_list.append(cohen_kappa_score(y_test, hard_vsc.predict(X_test)))
recall_score_list.append(recall_score(y_test, hard_vsc.predict(X_test)))
roc_auc_score_list.append(roc_auc_score(y_test, hard_vsc.predict(X_test)))

##### StackingClassifier软投票

In [23]:
soft_vsc = StackingClassifier(
    estimators=estimators, 
    final_estimator=LogisticRegression(random_state=42), 
    cv=5, 
    stack_method="predict_proba"
)
start_time = time.time()
soft_vsc.fit(X_train, y_train)
end_time = time.time()
print(f"SoftStackingClassifier训练花费的时间为: {(end_time-start_time):.4f} 秒")

[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.1s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.2s
[Pipeline] ..... (step 2 of 2) Processing SGDClassifier, total=   0.2s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.2s
[Pipeline]  (step 2 of 2) Processing RandomForestClassifier, total=   3.8s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.4s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.4s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.5s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.3s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifi

In [24]:
cohen_kappa_score_list.append(cohen_kappa_score(y_test, soft_vsc.predict(X_test)))
recall_score_list.append(recall_score(y_test, soft_vsc.predict(X_test)))
roc_auc_score_list.append(roc_auc_score(y_test, soft_vsc.predict(X_test)))

#### 多层Stacking

其实从此前的介绍不难看出，Stacking的本质就是围绕上一层模型输出结果进行学习，借此提升最终预测效果。而在这个过程中，元学习器本身也是可以输出概率预测结果的，也就是说，某个元学习器之后还可以再堆叠一层元学习器。而如果两层的堆叠能够提升单模效果，那么双层的堆叠则能够进一步提升学习能力，从而进一步提升模型效果。当然，伴随着Stacking结构更加复杂，融合的过拟合风险也会更高

In [25]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

log_final = LogisticRegression(random_state=42)
rf_final = RandomForestClassifier(random_state=42)
final_layer = StackingClassifier(
    estimators=[("log_final", log_final), ("RF_final", rf_final)],
    final_estimator=LogisticRegression(penalty="l1", solver="saga"),
)
multi_layer = StackingClassifier(
    estimators=estimators, final_estimator=final_layer, cv=10
)
start_time = time.time()
multi_layer.fit(X_train, y_train)
end_time = time.time()
print(f"MultiLayerStackingClassifier 训练花费的时间为: {(end_time-start_time):.4f} 秒")

[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.3s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.3s
[Pipeline] ..... (step 2 of 2) Processing SGDClassifier, total=   0.3s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.3s
[Pipeline]  (step 2 of 2) Processing RandomForestClassifier, total=   3.8s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   4.0s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.7s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.8s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifier, total=   0.1s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   3.7s
[Pipeline]  (step 2 of 2) Processing DecisionTreeClassifi

In [26]:
cohen_kappa_score_list.append(cohen_kappa_score(y_test, multi_layer.predict(X_test)))
recall_score_list.append(recall_score(y_test, multi_layer.predict(X_test)))
roc_auc_score_list.append(roc_auc_score(y_test, multi_layer.predict(X_test)))

#### Stacking模型融合优化策略

`Stacking`模型融合和投票法与均值法类似，原理不难，但要获得一个稳定的优化效果却并没有那么简单。并且相比投票法与均值法，`Stacking`过程采用模型来学习一级评估器的输出结果和标签之间的关系，过拟合的倾向会更加明显。当然，关于`Stacking`容易过拟合的另一个理解的角度是：由于第一层学习器就已经提取了和标签更有关联度的特征，因此元学习器的学习难度偏弱，元学习器更容易过拟合。因此主要的融合优化策略有以下几种：
* 一、二级学习器优化：包括一级学习器训练方法优化与元学习器优化。这是最基础同时也是最核心的优化策略，其核心目的在于平衡Stacking融合的学习效果与过拟合倾向之间的关系
* 多层`Stacking`，通过叠加更多层来提高`Stacking`的学习效果，当然在大多数情况下解决单层`Stacking`融合的过拟合问题已属实不易，要用好多层`Stacking`则更是难上加难
* 特征增强：一级学习器和元学习器带入不同的特征（或衍生特征的）组合，来提高模型多样性，并最终提升Stacking融合效果

## Blending法

> 该模型的训练还未完成，后续进一步完善！

Blending融合的基本过程和Stacking融合较为类似，都是两层模型的基本架构，即都是一级学习器进行训练，然后一级学习器的训练结果带入元学习器进行学习和训练。而和Stacking有所不同的是，为了避免一组数据重复训练导致的过拟合，Blending会在训练集中进一步划分训练集和留出集，一般比例为5：5到9：1不等。其中训练集用于一级学习器的训练，然后一级学习器围绕留出集进行预测，预测结果拼接成类似Stacking中的oof数据集，再将其带入元学习器进行模型训练。至此，即完成了两层模型的训练，其基本过程如下：

### Blending模型的实现

#### 划分训练集和留出集

首先，是训练集和留出集的划分，这里我们按照8：2的比例划分进行划分，并将训练集命名为`trainOE`、留出集命名为`leave`

In [27]:
# X_trainOE, X_leave, y_trainOE, y_leave = train_test_split(X_train, y_train,  test_size=0.2, random_state=42)
# X_trainOE.sample(n=1, random_state=42)

#### 一级学习器训练

##### 决策树模型

In [28]:
# from sklearn.pipeline import Pipeline

# tree = Pipeline(
#     steps=[
#         ("preprocessor", preprocessor),
#         (
#             "DecisionTreeClassifier",
#             DecisionTreeClassifier(random_state=42)
#         ),
#     ],
#     memory=None,
#     verbose=True,
# )

# tree.fit(X_trainOE, y_trainOE)

##### 逻辑回归模型

In [29]:
# lr = Pipeline(
#     steps=[
#         ("preprocessor", preprocessor),
#         (
#             "LogisticRegression",
#             LogisticRegression(random_state=42)
#         ),
#     ],
#     memory=None,
#     verbose=True,
# )
# lr.fit(X_trainOE, y_trainOE)

##### 随机森林模型

In [30]:
# rf = Pipeline(
#     steps=[
#         ("preprocessor", preprocessor),
#         (
#             "RandomForestClassifier",
#             RandomForestClassifier(random_state=42)
#         ),
#     ],
#     memory=None,
#     verbose=True,
# )
# rf.fit(X_trainOE, y_trainOE)

#### 元学习器训练和测试数据创建

In [31]:
# train_blending = pd.DataFrame(
#     {
#         "lr_blending": lr.predict_proba(X_leave)[:, 1],
#         "rf_blending": rf.predict_proba(X_leave)[:, 1],
#         "tree_blending": tree.predict_proba(X_leave)[:, 1],
#     }
# )
# train_blending.sample(5, random_state=42)

In [32]:
# test_blending = pd.DataFrame(
#     {
#         "lr_blending": lr.predict_proba(X_test)[:, 1],
#         "RF_blending": rf.predict_proba(X_test)[:, 1],
#         "tree_blending": tree.predict_proba(X_test)[:, 1],
#     }
# )
# test_blending.sample(5, random_state=42)

#### 元学习器训练测试

In [33]:
# lr = LogisticRegression(random_state=42)
# lr.fit(train_blending, y_leave)

## 保存数据

In [34]:
results = pd.DataFrame({
    "Kappa": cohen_kappa_score_list,
    "Recall": recall_score_list,
    "AUC": roc_auc_score_list,
})
results

Unnamed: 0,Kappa,Recall,AUC
0,0.937327,0.939674,0.965087
1,0.938426,0.93577,0.964917
2,0.939333,0.936835,0.965449
3,0.949926,0.951029,0.971952
4,0.961898,0.96203,0.978521
5,0.947598,0.952803,0.971296
6,0.957506,0.964514,0.977151
7,0.959683,0.973031,0.979391


In [35]:
models = {
    "m1": hard_vc,
    "m2": soft_vc,
    "m3":soft_vc_weight,
    "m4": soft_vc_core_weight,
    "m5": stacking_model,
    "m6": hard_vsc,
    "m7": soft_vsc,
    "m8": multi_layer
}

for model_name, model in models.items():
    joblib.dump(
        value=model,
        filename=f"../app/models/fusion-models/{model_name}.joblib",
        compress=5
)