In [1]:
import pandas as pd 
import numpy as np 
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('font', family = 'Malgun Gothic')
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.family'] = 'Malgun Gothic'
import seaborn as sns
import scipy.stats as stats

In [4]:
from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [3]:
df_raw = pd.read_csv('data/bank.csv')
df_raw['JOB'].fillna('Other', inplace = True)
df_raw.fillna(df_raw.mean(), inplace = True)

df_raw_dummy = pd.get_dummies(df_raw)

df_train, df_test = train_test_split(df_raw_dummy, test_size= 0.2, random_state=1234)
print('학습용 데이터의 크기: {}'.format(df_train.shape))
print('평가용 데이터의 크기: {}'.format(df_test.shape))

df_train_y = df_train['BAD']
df_train_x = df_train.drop(columns = 'BAD', inplace = False)
df_test_y = df_test['BAD']
df_test_x = df_test.drop(columns = 'BAD', inplace = False)

학습용 데이터의 크기: (2998, 19)
평가용 데이터의 크기: (750, 19)


  df_raw.fillna(df_raw.mean(), inplace = True)


In [5]:
# 모델 생성

NB_uncustomized = GaussianNB()
NB_model = NB_uncustomized.fit(df_train_x, df_train_y)

print('학습용 데이터의 정확도: {:.3f}'.format(NB_model.score(df_train_x, df_train_y)))
print('평가용 데이터의 정확도: {:.3f}'.format(NB_model.score(df_test_x, df_test_y)))

학습용 데이터의 정확도: 0.917
평가용 데이터의 정확도: 0.924


In [6]:
NB_model.get_params()

{'priors': None, 'var_smoothing': 1e-09}

In [7]:
# 평가
y_pred = NB_model.predict(df_test_x)

print(confusion_matrix(df_test_y, y_pred))
print(classification_report(df_test_y, y_pred))

[[680   4]
 [ 53  13]]
              precision    recall  f1-score   support

           0       0.93      0.99      0.96       684
           1       0.76      0.20      0.31        66

    accuracy                           0.92       750
   macro avg       0.85      0.60      0.64       750
weighted avg       0.91      0.92      0.90       750



# 실습

In [8]:
df_tel = pd.read_csv('data/tele_customer.csv')
df_tel.head()

Unnamed: 0,CHURN,CUSTOMER_ID,GENDER,AGE,CHARGE_TYPE,HANDSET,USAGE_BAND,SERVICE_DURATION,DROPPED_CALLS,PEAK_CALLS_NO,PEAK_CALLS_TIME,WEEKEND_CALLS_NO,WEEKEND_CALLS_TIME,TOTAL_CALLS_NO,TOTAL_CALLS_TIME
0,Active,K102990,F,31,CAT 100,ASAD170,Med,33.33,6,218,379.8,9,21.6,366,632.4
1,Active,K103280,M,27,CAT 100,S50,Med,30.87,10,373,656.4,11,28.0,411,810.4
2,Active,K103660,M,27,CAT 100,ASAD170,Med,49.4,11,260,582.0,0,0.0,265,614.1
3,Active,K103730,M,61,CAT 100,ASAD170,Med,48.6,1,294,661.8,14,101.5,326,844.0
4,Active,K104560,F,52,CAT 100,S50,Med,56.03,0,221,555.6,7,25.0,274,673.0


In [9]:
df_tel['CHURN'].replace({'Active': 0, 'Churned': 1}, inplace = True)

In [10]:
df_tel.drop(columns = 'CUSTOMER_ID', inplace = True)

In [11]:
df_tel.head()

Unnamed: 0,CHURN,GENDER,AGE,CHARGE_TYPE,HANDSET,USAGE_BAND,SERVICE_DURATION,DROPPED_CALLS,PEAK_CALLS_NO,PEAK_CALLS_TIME,WEEKEND_CALLS_NO,WEEKEND_CALLS_TIME,TOTAL_CALLS_NO,TOTAL_CALLS_TIME
0,0,F,31,CAT 100,ASAD170,Med,33.33,6,218,379.8,9,21.6,366,632.4
1,0,M,27,CAT 100,S50,Med,30.87,10,373,656.4,11,28.0,411,810.4
2,0,M,27,CAT 100,ASAD170,Med,49.4,11,260,582.0,0,0.0,265,614.1
3,0,M,61,CAT 100,ASAD170,Med,48.6,1,294,661.8,14,101.5,326,844.0
4,0,F,52,CAT 100,S50,Med,56.03,0,221,555.6,7,25.0,274,673.0


In [12]:
df_tel_y = df_tel['CHURN']
df_tel_x = df_tel.drop(columns = 'CHURN')

In [None]:
# 데이터 분할
df_train_x, df_test = train_test_split(df_tel, test_size= 0.2, random_state=1234)
print('학습용 데이터 크기: {}'.format(df_train.shape))
print('평가용 데이터 크기: {}'.format(df_test.shape))

In [13]:
pip list

Package                            Version
---------------------------------- --------------------
absl-py                            1.0.0
alabaster                          0.7.12
anaconda-client                    1.9.0
anaconda-navigator                 2.1.1
anaconda-project                   0.10.1
anyio                              2.2.0
appdirs                            1.4.4
argh                               0.26.2
argon2-cffi                        20.1.0
arrow                              0.13.1
asn1crypto                         1.4.0
astroid                            2.6.6
astropy                            4.3.1
astunparse                         1.6.3
async-generator                    1.10
atomicwrites                       1.4.0
attrs                              21.2.0
autopep8                           1.5.7
Babel                              2.9.1
backcall                           0.2.0
backports.functools-lru-cache      1.6.4
backports.shutil-get-terminal-size 

