In [None]:
import os
import pandas as pd
import random
import numpy as np
from autogluon.tabular import TabularDataset, TabularPredictor
import autogluon.core as ag

import warnings
warnings.filterwarnings("ignore")

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(69) 

train_data = pd.read_csv('./train.csv')
test_data = pd.read_csv('./test.csv')

In [None]:
train_data = TabularDataset(train_data)
test_data = TabularDataset(test_data)

train_data.drop('ID',axis = 1, inplace = True)
test_data.drop('ID',axis = 1, inplace = True)


label = '전화해지여부'
eval_metric = 'f1_macro'
time_limit = 3600*6

In [None]:
#excluded_model_types = ['CAT']
predictor = TabularPredictor(
    label=label, problem_type='binary', eval_metric=eval_metric
).fit(train_data, 
      presets='best_quality', 
      num_stack_levels=3,
      #excluded_model_types = excluded_model_types,
      time_limit=time_limit, num_gpus=1)

In [6]:
print(predictor.leaderboard(silent = True)) 

                      model  score_val  pred_time_val      fit_time  \
0       WeightedEnsemble_L5   0.879520      70.110687  18687.059829   
1       WeightedEnsemble_L4   0.879049      55.920799  11078.315858   
2    NeuralNetFastAI_BAG_L3   0.878717      55.345263  10912.223183   
3    NeuralNetFastAI_BAG_L4   0.878160      66.741636  17575.823117   
4     NeuralNetTorch_BAG_L4   0.876145      66.087039  18053.954360   
5       WeightedEnsemble_L3   0.875950      49.149645   5316.622108   
6            XGBoost_BAG_L3   0.874981      53.040211  10149.001335   
7      LightGBMLarge_BAG_L2   0.874959      44.957846   5084.666288   
8           CatBoost_BAG_L3   0.874506      52.676873  13516.771563   
9     ExtraTreesEntr_BAG_L3   0.874433      53.215357   9995.237875   
10          CatBoost_BAG_L4   0.874216      64.871957  18480.201667   
11        LightGBMXT_BAG_L4   0.874034      65.150200  17020.806683   
12           XGBoost_BAG_L4   0.874033      65.128175  17063.605708   
13    

In [None]:
predictor.feature_importance(train_data) #original

In [None]:
model_to_use = predictor.get_model_best()
model_pred = predictor.predict(test_data, model=model_to_use)

submission = pd.read_csv('./sample_submission.csv')

submission["전화해지여부"] = model_pred
submission.to_csv('./Gluon_submission.csv', index=False, encoding="utf-8")