
# 不平衡樣本測試及評估

In [3]:
!pip install -U imbalanced-learn



In [1]:
from sklearn import datasets
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from imblearn import pipeline as pl

# 生成不平衡樣本
RANDOM_STATE = 42
X, y = datasets.make_classification(n_classes=2, class_sep=2,
                                    weights=[0.1, 0.9], n_informative=10,
                                    n_redundant=1, flip_y=0, n_features=20,
                                    n_clusters_per_class=4, n_samples=5000,
                                    random_state=RANDOM_STATE)

# 採用 SVM 
pipeline = pl.make_pipeline(LinearSVC(random_state=RANDOM_STATE))

# 資料切割
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=RANDOM_STATE)

# 訓練
pipeline.fit(X_train, y_train)

# 評估
y_pred_bal = pipeline.predict(X_test)
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_bal))

              precision    recall  f1-score   support

           0       0.82      0.63      0.71       123
           1       0.96      0.98      0.97      1127

    accuracy                           0.95      1250
   macro avg       0.89      0.81      0.84      1250
weighted avg       0.95      0.95      0.95      1250





## imbalanced-learn分類報表

In [8]:
# 如發生錯誤：AttributeError: 'NoneType' object has no attribute 'split'
!pip install -U threadpoolctl
# Kernel restart

Collecting threadpoolctl
  Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Installing collected packages: threadpoolctl
  Attempting uninstall: threadpoolctl
    Found existing installation: threadpoolctl 2.2.0
    Uninstalling threadpoolctl-2.2.0:
      Successfully uninstalled threadpoolctl-2.2.0
Successfully installed threadpoolctl-3.1.0


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fiftyone 0.19.1 requires opencv-python-headless, which is not installed.
fiftyone 0.19.1 requires starlette==0.20.4, but you have starlette 0.26.1 which is incompatible.
animated-drawings 0.0.0 requires numpy==1.23.3, but you have numpy 1.23.5 which is incompatible.
animated-drawings 0.0.0 requires Pillow==9.3.0, but you have pillow 9.0.0 which is incompatible.
animated-drawings 0.0.0 requires scikit-learn==1.1.2, but you have scikit-learn 1.2.2 which is incompatible.


In [2]:
from sklearn import datasets
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

from imblearn.over_sampling import SMOTE
from imblearn import pipeline as pl
from imblearn.metrics import classification_report_imbalanced

# SMOTE
X_res, y_res = SMOTE().fit_resample(X, y)

X_res.shape, y_res.shape

((9000, 20), (9000,))

In [3]:
# 資料切割
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=RANDOM_STATE)

# 訓練
pipeline.fit(X_train, y_train)

# 評估
y_pred_bal = pipeline.predict(X_test)

# Show the classification report
print(classification_report_imbalanced(y_test, y_pred_bal))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.82      0.63      0.98      0.71      0.79      0.59       123
          1       0.96      0.98      0.63      0.97      0.79      0.64      1127

avg / total       0.95      0.95      0.66      0.95      0.79      0.63      1250





In [4]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_bal))

              precision    recall  f1-score   support

           0       0.82      0.63      0.71       123
           1       0.96      0.98      0.97      1127

    accuracy                           0.95      1250
   macro avg       0.89      0.81      0.84      1250
weighted avg       0.95      0.95      0.95      1250

