In [None]:
import pandas as pd
import numpy as np
import random
import os
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42)

matplotlib.rcParams['font.family'] = 'Malgun Gothic'
matplotlib.rcParams['axes.unicode_minus'] = False

In [None]:
train = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\고객대출등급분류\train.csv').drop(columns = 'ID')
test = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\고객대출등급분류\test.csv').drop(columns = 'ID')
submit = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\고객대출등급분류\sample_submission.csv')

train.head(5)

In [None]:
train['대출등급'].unique()

In [None]:
train.info()

In [None]:
train.describe()

In [None]:
test.info()

In [None]:
train.isnull().sum()

In [None]:
train.value_counts('근로기간')

In [None]:
train.value_counts('대출등급')

In [None]:
# fig, axes = plt.subplots(2, 2, figsize = (25, 10))

# sns.countplot(x = train['대출기간'], ax = axes[0][0]).set_title('대출기간')
# sns.countplot(x = train['근로기간'], ax = axes[0][1]).set_title('근로기간')
# sns.countplot(x = train['주택소유상태'], ax = axes[1][0]).set_title('주택소유상태')
# sns.countplot(x = train['대출목적'], ax = axes[1][1]).set_title('대출목적')

In [None]:
from sklearn.preprocessing import LabelEncoder

categorical_features = ['대출기간', '근로기간', '주택소유상태', '대출목적']

for i in categorical_features:
    le = LabelEncoder()
    le=le.fit(train[i]) 
    train[i]=le.transform(train[i])
    
    for case in np.unique(test[i]):
        if case not in le.classes_: 
            le.classes_ = np.append(le.classes_, case) 
    test[i]=le.transform(test[i])

X = train.drop(columns=['대출등급'])

le_y = LabelEncoder()
y = le_y.fit_transform(train['대출등급'])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

rf_clf = RandomForestClassifier()
ext_clf = ExtraTreesClassifier()
xgb_clf = XGBClassifier()
lgbm_clf = LGBMClassifier()
cat_clf = CatBoostClassifier()

In [None]:
rf_clf.fit(X_train, y_train)
ext_clf.fit(X_train, y_train)
xgb_clf.fit(X_train, y_train)
lgbm_clf.fit(X_train, y_train)
cat_clf.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score

rf_pred = rf_clf.predict(X_val)
ext_pred = ext_clf.predict(X_val)
xgb_pred = xgb_clf.predict(X_val)
lgbm_pred = lgbm_clf.predict(X_val)
cat_pred = cat_clf.predict(X_val)

print('rf 정확도 :', accuracy_score(y_val, rf_pred))
print('ext 정확도 :', accuracy_score(y_val, ext_pred))
print('xgb 정확도 :', accuracy_score(y_val, xgb_pred))
print('lgbm 정확도 :', accuracy_score(y_val, lgbm_pred))
print('cat 정확도 :', accuracy_score(y_val, cat_pred))

In [None]:
stacked = np.column_stack([rf_pred, ext_pred, xgb_pred, lgbm_pred, cat_pred])
stacked.shape

In [None]:
xgb_final = XGBClassifier(objective="multi:softprob", num_class=7)

xgb_final.fit(stacked, y_val)

final_pred_train = xgb_final.predict(stacked)

print('최종 정확도 :', accuracy_score(y_val, final_pred_train))

In [None]:
rf_test_pred = rf_clf.predict(test)
ext_test_pred = ext_clf.predict(test)
xgb_test_pred = xgb_clf.predict(test)
lgbm_test_pred = lgbm_clf.predict(test)
cat_test_pred = cat_clf.predict(test)

In [None]:
stacked_test = np.column_stack([rf_test_pred, ext_test_pred, xgb_test_pred, lgbm_test_pred, cat_test_pred])

final_test_pred = xgb_final.predict(stacked_test)

final_test_pred.shape

In [None]:
submit['대출등급 '] = final_test_pred

submit.shape

predicted_labels = submit['대출등급']

predicted_original_labels = le_y.inverse_transform(predicted_labels)

submit['대출등급'] = predicted_original_labels

In [None]:
submit.to_csv('C:\\Users\\dlwks\\OneDrive\\바탕 화면\\VSCode\\고객대출등급분류\\1.csv', index = False)