# 라이브러리

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve, auc

# combine 데이터

## 데이터 전처리

In [3]:
data = pd.read_csv('/content/drive/Shareddrives/서울시 빅데이터(2022-2)/논문/3. 폐업 여부 분류 모델/0) 데이터/data_combined.csv')

In [4]:
data['폐업여부']=data['폐업여부'].apply(lambda x: 1 if x=="Y" else 0)
data['프랜차이즈여부']=data['프랜차이즈여부'].apply(lambda x: 1 if x=="Y" else 0)

In [5]:
data['표준산업분류코드']=data['표준산업분류코드'].astype('category')
data['행정동코드']=data['행정동코드'].astype('category')
data['분기']=data['분기'].astype('category')

In [6]:
data_encoded = pd.get_dummies(data, columns=['표준산업분류코드', '행정동코드', '분기'])
data_encoded = data_encoded.drop(['점포명'], axis=1)

In [7]:
train_data = data_encoded[data_encoded['년도'] == 2021]
test_data = data_encoded[data_encoded['년도'] == 2022]

In [8]:
train_data = train_data.drop(['년도'], axis=1)
test_data = test_data.drop(['년도'], axis=1)

In [9]:
x_train = train_data.drop(['폐업여부'], axis=1)
y_train = train_data['폐업여부']
x_test = test_data.drop(['폐업여부'], axis=1)
y_test = test_data['폐업여부']

## 모델링

### 기본

In [10]:
model = RandomForestClassifier(random_state=123, n_estimators=100)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5330
PR AUC: 0.041620198414685616


In [11]:
model = RandomForestClassifier(random_state=123, n_estimators=200)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5325
PR AUC: 0.04163701010336024


In [12]:
model = RandomForestClassifier(random_state=123, n_estimators=300)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5353
PR AUC: 0.041941488201623446


In [13]:
model = RandomForestClassifier(random_state=123, n_estimators=400)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5398
PR AUC: 0.04210871288488152


In [14]:
model = RandomForestClassifier(random_state=123, n_estimators=500)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5399
PR AUC: 0.04211614228115265


In [15]:
model = RandomForestClassifier(random_state=123, n_estimators=600)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5398
PR AUC: 0.042046253804785556


In [16]:
model = RandomForestClassifier(random_state=123, n_estimators=700)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5406
PR AUC: 0.04202497646431949


In [17]:
model = RandomForestClassifier(random_state=123, n_estimators=800)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5412
PR AUC: 0.042008898040530776


In [18]:
model = RandomForestClassifier(random_state=123, n_estimators=900)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5416
PR AUC: 0.04198571340358523


In [19]:
model = RandomForestClassifier(random_state=123, n_estimators=1000)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5408
PR AUC: 0.04186378801526619


### 앙상블

In [20]:
roc_auc_scores = []
pr_auc_scores = []
final_y_train_pred_proba = []
final_y_test_pred_proba = []

num_iterations = 20

for iteration in range(num_iterations):

    non_closed_data = train_data[train_data['폐업여부'] == 0].sample(n=5000, random_state= iteration+426)
    closed_data = train_data[train_data['폐업여부'] == 1]

    new_train_data = pd.concat([non_closed_data, closed_data], axis=0)

    new_x_train = new_train_data.drop(['폐업여부'], axis=1)
    new_y_train = new_train_data['폐업여부']

    model = RandomForestClassifier(random_state=123, n_estimators=900)

    model.fit(new_x_train, new_y_train)

    final_y_train_pred_proba.append(model.predict_proba(x_train)[:, 1])
    final_y_test_pred_proba.append(model.predict_proba(x_test)[:, 1])

final_y_train_pred_proba = np.mean(final_y_train_pred_proba, axis=0)
final_y_test_pred_proba = np.mean(final_y_test_pred_proba, axis=0)

final_roc_auc_test = roc_auc_score(y_train,final_y_train_pred_proba)
print("Final Train ROC AUC:", final_roc_auc_test)
precision, recall, _ = precision_recall_curve(y_train,final_y_train_pred_proba)
final_pr_auc_test = auc(recall, precision)
print("Final Train PR AUC:", final_pr_auc_test)

final_roc_auc_test = roc_auc_score(y_test, final_y_test_pred_proba)
print("Final Test ROC AUC:", final_roc_auc_test)
precision, recall, _ = precision_recall_curve(y_test, final_y_test_pred_proba)
final_pr_auc_test = auc(recall, precision)
print("Final Test PR AUC:", final_pr_auc_test)

Final Train ROC AUC: 0.9773298051288296
Final Train PR AUC: 0.5370683735215769
Final Test ROC AUC: 0.5422635424574044
Final Test PR AUC: 0.04306514443356642


# total 데이터

## 데이터 전처리

In [21]:
data = pd.read_csv('/content/drive/Shareddrives/서울시 빅데이터(2022-2)/논문/3. 폐업 여부 분류 모델/0) 데이터/data_total.csv')

In [22]:
data['폐업여부']=data['폐업여부'].apply(lambda x: 1 if x=="Y" else 0)
data['프랜차이즈여부']=data['프랜차이즈여부'].apply(lambda x: 1 if x=="Y" else 0)

In [23]:
data['표준산업분류코드']=data['표준산업분류코드'].astype('category')
data['행정동코드']=data['행정동코드'].astype('category')
data['분기']=data['분기'].astype('category')

In [24]:
data_encoded = pd.get_dummies(data, columns=['표준산업분류코드', '행정동코드', '분기'])
data_encoded = data_encoded.drop(['점포명'], axis=1)

In [25]:
train_data = data_encoded[data_encoded['년도'] == 2021]
test_data = data_encoded[data_encoded['년도'] == 2022]

In [26]:
train_data = train_data.drop(['년도'], axis=1)
test_data = test_data.drop(['년도'], axis=1)

In [27]:
x_train = train_data.drop(['폐업여부'], axis=1)
y_train = train_data['폐업여부']
x_test = test_data.drop(['폐업여부'], axis=1)
y_test = test_data['폐업여부']

## 모델링

### 기본

In [28]:
model = RandomForestClassifier(random_state=123, n_estimators=100)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5316
PR AUC: 0.04131021564908949


In [29]:
model = RandomForestClassifier(random_state=123, n_estimators=200)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5393
PR AUC: 0.04190449366909107


In [30]:
model = RandomForestClassifier(random_state=123, n_estimators=300)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5400
PR AUC: 0.04155291639667089


In [31]:
model = RandomForestClassifier(random_state=123, n_estimators=400)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5396
PR AUC: 0.041426334612883746


In [32]:
model = RandomForestClassifier(random_state=123, n_estimators=500)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5411
PR AUC: 0.0416351304065782


In [33]:
model = RandomForestClassifier(random_state=123, n_estimators=600)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5414
PR AUC: 0.04164138276979091


In [34]:
model = RandomForestClassifier(random_state=123, n_estimators=700)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5404
PR AUC: 0.0416768092143254


In [35]:
model = RandomForestClassifier(random_state=123, n_estimators=800)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5394
PR AUC: 0.04151209563099316


In [36]:
model = RandomForestClassifier(random_state=123, n_estimators=900)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5398
PR AUC: 0.04162013185638978


In [37]:
model = RandomForestClassifier(random_state=123, n_estimators=1000)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5400
PR AUC: 0.04162499556019142


### 앙상블

In [38]:
roc_auc_scores = []
pr_auc_scores = []
final_y_train_pred_proba = []
final_y_test_pred_proba = []

num_iterations = 20

for iteration in range(num_iterations):

    non_closed_data = train_data[train_data['폐업여부'] == 0].sample(n=5000, random_state= iteration+426)
    closed_data = train_data[train_data['폐업여부'] == 1]

    new_train_data = pd.concat([non_closed_data, closed_data], axis=0)

    new_x_train = new_train_data.drop(['폐업여부'], axis=1)
    new_y_train = new_train_data['폐업여부']

    model = RandomForestClassifier(random_state=123, n_estimators=600)

    model.fit(new_x_train, new_y_train)

    final_y_train_pred_proba.append(model.predict_proba(x_train)[:, 1])
    final_y_test_pred_proba.append(model.predict_proba(x_test)[:, 1])

final_y_train_pred_proba = np.mean(final_y_train_pred_proba, axis=0)
final_y_test_pred_proba = np.mean(final_y_test_pred_proba, axis=0)

final_roc_auc_test = roc_auc_score(y_train,final_y_train_pred_proba)
print("Final Train ROC AUC:", final_roc_auc_test)
precision, recall, _ = precision_recall_curve(y_train,final_y_train_pred_proba)
final_pr_auc_test = auc(recall, precision)
print("Final Train PR AUC:", final_pr_auc_test)

final_roc_auc_test = roc_auc_score(y_test, final_y_test_pred_proba)
print("Final Test ROC AUC:", final_roc_auc_test)
precision, recall, _ = precision_recall_curve(y_test, final_y_test_pred_proba)
final_pr_auc_test = auc(recall, precision)
print("Final Test PR AUC:", final_pr_auc_test)

Final Train ROC AUC: 0.9775370743979939
Final Train PR AUC: 0.538993746565493
Final Test ROC AUC: 0.5453113267780137
Final Test PR AUC: 0.043205468103261604


# preindex 데이터

## 데이터 전처리

In [41]:
data = pd.read_csv('/content/drive/Shareddrives/서울시 빅데이터(2022-2)/논문/3. 폐업 여부 분류 모델/0) 데이터/data_PredIndex.csv')

In [42]:
data['폐업여부']=data['폐업여부'].apply(lambda x: 1 if x=="Y" else 0)
data['프랜차이즈여부']=data['프랜차이즈여부'].apply(lambda x: 1 if x=="Y" else 0)

In [43]:
data['표준산업분류코드']=data['표준산업분류코드'].astype('category')
data['행정동코드']=data['행정동코드'].astype('category')
data['분기']=data['분기'].astype('category')

In [44]:
data_encoded = pd.get_dummies(data, columns=['표준산업분류코드', '행정동코드', '분기'])
data_encoded = data_encoded.drop(['점포명'], axis=1)

In [45]:
train_data = data_encoded[data_encoded['년도'] == 2021]
test_data = data_encoded[data_encoded['년도'] == 2022]

In [46]:
train_data = train_data.drop(['년도'], axis=1)
test_data = test_data.drop(['년도'], axis=1)

In [47]:
x_train = train_data.drop(['폐업여부'], axis=1)
y_train = train_data['폐업여부']
x_test = test_data.drop(['폐업여부'], axis=1)
y_test = test_data['폐업여부']

## 모델링

### 기본

In [48]:
model = RandomForestClassifier(random_state=123, n_estimators=100)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5424
PR AUC: 0.04256365113643346


In [49]:
model = RandomForestClassifier(random_state=123, n_estimators=200)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5530
PR AUC: 0.0440247353147851


In [50]:
model = RandomForestClassifier(random_state=123, n_estimators=300)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5585
PR AUC: 0.044599522778386316


In [51]:
model = RandomForestClassifier(random_state=123, n_estimators=400)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5593
PR AUC: 0.04455576250016481


In [52]:
model = RandomForestClassifier(random_state=123, n_estimators=500)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5642
PR AUC: 0.04500659391101103


In [53]:
model = RandomForestClassifier(random_state=123, n_estimators=600)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5647
PR AUC: 0.045133514778253975


In [54]:
model = RandomForestClassifier(random_state=123, n_estimators=700)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5647
PR AUC: 0.045084598386490884


In [55]:
model = RandomForestClassifier(random_state=123, n_estimators=800)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5643
PR AUC: 0.04514868214070368


In [56]:
model = RandomForestClassifier(random_state=123, n_estimators=900)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5649
PR AUC: 0.04522938837740634


In [57]:
model = RandomForestClassifier(random_state=123, n_estimators=1000)
model.fit(x_train, y_train)

ens_score = model.predict_proba(x_test)[:, 1]
roc_score = roc_auc_score(y_test, ens_score)
print('ROC AUC 값: {0:.4f}'.format(roc_score))

precision, recall, _ = precision_recall_curve(y_test, ens_score)
pr_auc_score = auc(recall, precision)
print("PR AUC:", pr_auc_score)

ROC AUC 값: 0.5653
PR AUC: 0.04516622669462195


### 앙상블

In [58]:
roc_auc_scores = []
pr_auc_scores = []
final_y_train_pred_proba = []
final_y_test_pred_proba = []

num_iterations = 20

for iteration in range(num_iterations):

    non_closed_data = train_data[train_data['폐업여부'] == 0].sample(n=5000, random_state=iteration+426)
    closed_data = train_data[train_data['폐업여부'] == 1]

    new_train_data = pd.concat([non_closed_data, closed_data], axis=0)

    new_x_train = new_train_data.drop(['폐업여부'], axis=1)
    new_y_train = new_train_data['폐업여부']

    model = RandomForestClassifier(random_state=123, n_estimators=1000)

    model.fit(new_x_train, new_y_train)

    final_y_train_pred_proba.append(model.predict_proba(x_train)[:, 1])
    final_y_test_pred_proba.append(model.predict_proba(x_test)[:, 1])

final_y_train_pred_proba = np.mean(final_y_train_pred_proba, axis=0)
final_y_test_pred_proba = np.mean(final_y_test_pred_proba, axis=0)

final_roc_auc_test = roc_auc_score(y_train,final_y_train_pred_proba)
print("Final Train ROC AUC:", final_roc_auc_test)
precision, recall, _ = precision_recall_curve(y_train,final_y_train_pred_proba)
final_pr_auc_test = auc(recall, precision)
print("Final Train PR AUC:", final_pr_auc_test)

final_roc_auc_test = roc_auc_score(y_test, final_y_test_pred_proba)
print("Final Test ROC AUC:", final_roc_auc_test)
precision, recall, _ = precision_recall_curve(y_test, final_y_test_pred_proba)
final_pr_auc_test = auc(recall, precision)
print("Final Test PR AUC:", final_pr_auc_test)

Final Train ROC AUC: 0.9770574029517884
Final Train PR AUC: 0.5422298288582291
Final Test ROC AUC: 0.5763601936919747
Final Test PR AUC: 0.04850905864524303
