# KampVoter

## Import Library

### Basic Libary

In [83]:
from kamp.preprocess import KampDataLoader
from kamp.models import KampVoter

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import f1_score, classification_report, confusion_matrix
import pandas as pd

### Models

In [84]:
# Forest Models
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier

# Boosting Models
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier

# Linear Models
from sklearn.linear_model import LogisticRegression

# Discriminan Models
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis

# Distance Based Models
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# NN Models
from sklearn.neural_network import MLPClassifier

## Data Load

In [85]:
DATA_PATH = './data/경진대회용 주조 공정최적화 데이터셋.csv'


data_loader = KampDataLoader(
    path = DATA_PATH,

    # 처리 안한게 더 좋았음
    # 처리 안한 것 : 0.944
    # 처리 한 것 : 최대 0.922
    do_count_trend=False,
    drop_count=False,

    get_useful_p_data=True,
    p_threshold=0.05,

    outlier_method='iso',
    iso_outlier_rate=0.0075,

    do_resample=False,
    # downsampled_pass_rate=1.0,
    # upsampled_fail_rate_about_pass=1.0,
    # upsample_method='adasyn',

    scale_include_cat=False
)

data_loader.process()

data = data_loader.load()

x_train = data['train_data']
y_train = data['train_label']
x_test = data['test_data']
y_test=  data['test_label']


[Process Log] Loading Raw Data...
[Process Log] Done

[Process Log] Processing Nan Value...
[Process Log] Done

[Process Log] Encoding Categorical Features...
[Process Log] Done

[Process Log] Removing Outliers (IsoForest)...
[Outlier-Remover Log] With Outliers Shape : (89753, 23)
[Outlier-Remover Log] Without Outliers Shape : (89079, 23)
[Process Log] Done

[Process Log] T-Testing...
[Process Log] Done

[Process Log] Data Scaling (MinMaxScaler)...
[Process Log] Done

[Process Log] Train Test Spliting...
[Process Log] Done



## Modeling

[Best Version]

```python
voting_models = {
    'catboost' : CatBoostClassifier(random_state=42, verbose=0),
    'lgbm' : LGBMClassifier(random_state=42, verbose=0),
    'xgb' : XGBClassifier(random_state=42),
}

model_weights = [1,1.5,1]

kamp_voter = KampVoter(voting_models=voting_models,
                       model_weights=model_weights, 
                       voting_method='soft')
```

```python
"-----------------------------------------------------------"
f1_score : 0.9598765432098766

confusion matrix : 
[[17142    18]
 [   34   622]]

classification report : 
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     17160
         1.0       0.97      0.95      0.96       656

    accuracy                           1.00     17816
   macro avg       0.98      0.97      0.98     17816
weighted avg       1.00      1.00      1.00     17816
```

In [None]:
voting_models = {
    'catboost' : CatBoostClassifier(random_state=42, verbose=0),
    'lgbm' : LGBMClassifier(random_state=42, verbose=0),
    'xgb' : XGBClassifier(random_state=42),
}

model_weights = [1,1.5,1]

kamp_voter = KampVoter(voting_models=voting_models,
                       model_weights=model_weights, 
                       voting_method='soft')

In [119]:
kamp_voter.fit(x_train, y_train)

[Voting] ................. (1 of 3) Processing catboost, total=   7.6s
[Voting] ..................... (2 of 3) Processing lgbm, total=   0.2s
[Voting] ...................... (3 of 3) Processing xgb, total=   0.2s


## Evaluation

In [120]:
kamp_voter.evaluate(x_train, y_train)

f1_score : 0.9952134788435765

confusion matrix : 
[[68639     1]
 [   24  2599]]

classification report : 
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     68640
         1.0       1.00      0.99      1.00      2623

    accuracy                           1.00     71263
   macro avg       1.00      1.00      1.00     71263
weighted avg       1.00      1.00      1.00     71263




In [121]:
kamp_voter.evaluate(x_test, y_test)

f1_score : 0.9583333333333334

confusion matrix : 
[[17141    19]
 [   35   621]]

classification report : 
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     17160
         1.0       0.97      0.95      0.96       656

    accuracy                           1.00     17816
   macro avg       0.98      0.97      0.98     17816
weighted avg       1.00      1.00      1.00     17816


