#### 포함할 라이브러리

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

#### 함수 정의

In [47]:
# 최적의 이웃 수 찾는 함수 정의
def OptKNeighbours(
    data,
    X_columns,
    y_column,
    test_size = None,
    random_state = None,
    maxNeighbours = 99
):
    """최적의 이웃 수 찾기
    data : 전체 데이터
    X_columns : 학습에 포함시킬 컬럼
    y_columns : 정답 컬럼
    test_size : 테스트 데이터 비율
    random_state : 난수 시드
    maxNeighbours : 최대 이웃 수
    """
    # 결과 담을 데이터 프레임
    result = pd.DataFrame()

    # 데이터 분할
    X_train, X_test, y_train, y_test = train_test_split(data[X_columns], data[y_column], test_size = test_size, random_state = random_state)

    # 데이터 분할 정보 출력
    print(f"X_train{X_train.shape}, X_test{X_test.shape}, y_train{y_train.shape}, y_test{y_test.shape}")

    # 최대 이웃 수 조정
    if maxNeighbours > len(y_test):
        maxNeighbours = len(y_test)
    if maxNeighbours > len(y_train):
        maxNeighbours = len(y_train)
    
    # 학습
    n = 3
    while n <= maxNeighbours:
        model = KNeighborsClassifier(n_neighbors = n)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        result = pd.concat(
            [
                result,
                pd.Series(
                    index = ['이웃 수', '정확도', '정밀도', '재현율'],
                    data =
                    [
                        n,
                        accuracy_score(y_test, y_pred),
                        precision_score(y_test, y_pred),
                        recall_score(y_test, y_pred)
                    ]
                )
            ],
            axis = 1
        )
        n += 2

    # 출력용 데이터 프레임 다듬기
    result = result.T
    result["이웃 수"] = result["이웃 수"].astype(int)

    # 반환
    return result

In [63]:
# 최적의 의사결정나무 파라미터 찾는 함수 정의
def OptDecisionTree(
    data,
    X_columns,
    y_column,
    test_size = None,
    random_state = None,
    max_depth = 8,
    max_features = 8,
    max_leaf_nodes = 8
):
    """최적의 의사결정나무 파라미터 찾기
    data : 전체 데이터
    X_columns : 학습에 포함시킬 컬럼
    y_columns : 정답 컬럼
    test_size : 테스트 데이터 비율
    random_state : 난수 시드
    max_depth : 최대 깊이
    max_features : 최대 특성
    max_leaf_nodes : 최대 잎 수
    """
    # 결과 담을 데이터 프레임
    result = pd.DataFrame()

    # 데이터 분할
    X_train, X_test, y_train, y_test = train_test_split(data[X_columns], data[y_column], test_size = test_size, random_state = random_state)

    # 데이터 분할 정보 출력
    print(f"X_train{X_train.shape}, X_test{X_test.shape}, y_train{y_train.shape}, y_test{y_test.shape}")
    
    # 학습
    md = 1
    while md <= max_depth:
        mf = 1
        while mf <= max_features:
            ml = 2
            while ml <= max_leaf_nodes:
                model = DecisionTreeClassifier(max_depth = md, max_features = mf, max_leaf_nodes = ml)
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                result = pd.concat(
                    [
                        result,
                        pd.Series(
                            index = ['max_depth', 'max_features', 'max_leaf_nodes', '정확도', '정밀도', '재현율'],
                            data =
                            [
                                md,
                                mf,
                                ml,
                                accuracy_score(y_test, y_pred),
                                precision_score(y_test, y_pred),
                                recall_score(y_test, y_pred)
                            ]
                        )
                    ],
                    axis = 1
                )
                ml += 1
            mf += 1
        md += 1

    # 출력용 데이터 프레임 다듬기
    result = result.T
    result["max_depth"] = result["max_depth"].astype(int)
    result["max_features"] = result["max_features"].astype(int)
    result["max_leaf_nodes"] = result["max_leaf_nodes"].astype(int)

    # 반환
    return result

In [64]:
# 최적의 랜덤포레스트 파라미터 찾는 함수 정의
def OptRandomForest(
    data,
    X_columns,
    y_column,
    test_size = None,
    random_state = None,
    max_depth = 8,
    max_features = 8,
    max_leaf_nodes = 8
):
    """최적의 의사결정나무 파라미터 찾기
    data : 전체 데이터
    X_columns : 학습에 포함시킬 컬럼
    y_columns : 정답 컬럼
    test_size : 테스트 데이터 비율
    random_state : 난수 시드
    max_depth : 최대 깊이
    max_features : 최대 특성
    max_leaf_nodes : 최대 잎 수
    """
    # 결과 담을 데이터 프레임
    result = pd.DataFrame()

    # 데이터 분할
    X_train, X_test, y_train, y_test = train_test_split(data[X_columns], data[y_column], test_size = test_size, random_state = random_state)

    # 데이터 분할 정보 출력
    print(f"X_train{X_train.shape}, X_test{X_test.shape}, y_train{y_train.shape}, y_test{y_test.shape}")
    
    # 학습
    md = 1
    while md <= max_depth:
        mf = 1
        while mf <= max_features:
            ml = 2
            while ml <= max_leaf_nodes:
                model = RandomForestClassifier(max_depth = md, max_features = mf, max_leaf_nodes = ml)
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                result = pd.concat(
                    [
                        result,
                        pd.Series(
                            index = ['max_depth', 'max_features', 'max_leaf_nodes', '정확도', '정밀도', '재현율'],
                            data =
                            [
                                md,
                                mf,
                                ml,
                                accuracy_score(y_test, y_pred),
                                precision_score(y_test, y_pred),
                                recall_score(y_test, y_pred)
                            ]
                        )
                    ],
                    axis = 1
                )
                ml += 1
            mf += 1
        md += 1

    # 출력용 데이터 프레임 다듬기
    result = result.T
    result["max_depth"] = result["max_depth"].astype(int)
    result["max_features"] = result["max_features"].astype(int)
    result["max_leaf_nodes"] = result["max_leaf_nodes"].astype(int)

    # 반환
    return result

In [66]:
data = pd.read_csv("./data/Preprocessed/DataSet1.csv", index_col = "Patient Number")
data

Unnamed: 0_level_0,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
Patient Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Patiant-01,0.666667,0.000000,0.333333,0.333333,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.3,0.3,0.4,1
Patiant-02,0.666667,0.000000,0.666667,0.333333,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.2,0.5,1
Patiant-03,0.333333,1.000000,0.333333,0.333333,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.6,0.5,0.7,1
Patiant-04,0.666667,0.000000,0.666667,1.000000,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.3,0.2,0.2,1
Patiant-05,0.666667,0.666667,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.5,0.5,0.6,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Patiant-116,1.000000,0.000000,0.666667,0.333333,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.2,0.5,0.3,1
Patiant-117,0.333333,0.333333,0.333333,0.000000,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.6,0.7,0.8,1
Patiant-118,0.666667,0.333333,0.666667,0.333333,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.1,0.5,0.3,1
Patiant-119,0.666667,0.333333,0.000000,0.000000,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.7,0.7,0.7,1


In [67]:
# 정렬 순서
# 이웃 수로 오름차순으로 먼저 정렬
# 그 다음 재현율, 정확도, 정밀도 기준으로 내림차순
display(
    OptKNeighbours(data, data.columns.drop('Expert Diagnose'), 'Expert Diagnose', 0.3, 2024)
    .sort_values("이웃 수", ascending = True)
    .sort_values(["재현율", "정확도", "정밀도"], ascending = False)
    .head(20)
)

X_train(84, 17), X_test(36, 17), y_train(84,), y_test(36,)


Unnamed: 0,이웃 수,정확도,정밀도,재현율
0,13,0.916667,0.892857,1.0
0,23,0.916667,0.892857,1.0
0,25,0.916667,0.892857,1.0
0,27,0.916667,0.892857,1.0
0,29,0.888889,0.862069,1.0
0,31,0.861111,0.833333,1.0
0,33,0.777778,0.757576,1.0
0,35,0.75,0.735294,1.0
0,11,0.944444,0.96,0.96
0,15,0.944444,0.96,0.96


In [68]:
# 정렬 순서
# max_depth, max_features, max_leaf_nodes 기준으로 오름차순으로 먼저 정렬
# 그 다음 재현율, 정확도, 정밀도 기준으로 내림차순
display(
    OptDecisionTree(data, data.columns.drop('Expert Diagnose'), 'Expert Diagnose', 0.3, 2024)
    .sort_values(['max_depth', 'max_features', 'max_leaf_nodes'], ascending = True)
    .sort_values(["재현율", "정확도", "정밀도"], ascending = False)
    .head(20)
)

X_train(84, 17), X_test(36, 17), y_train(84,), y_test(36,)


Unnamed: 0,max_depth,max_features,max_leaf_nodes,정확도,정밀도,재현율
0,8,4,7,1.0,1.0,1.0
0,7,6,7,0.972222,0.961538,1.0
0,3,4,4,0.944444,0.925926,1.0
0,3,6,4,0.944444,0.925926,1.0
0,3,6,6,0.944444,0.925926,1.0
0,3,8,6,0.944444,0.925926,1.0
0,3,8,8,0.944444,0.925926,1.0
0,4,6,7,0.944444,0.925926,1.0
0,4,7,4,0.944444,0.925926,1.0
0,4,8,6,0.944444,0.925926,1.0


In [69]:
# 정렬 순서
# max_depth, max_features, max_leaf_nodes 기준으로 오름차순으로 먼저 정렬
# 그 다음 재현율, 정확도, 정밀도 기준으로 내림차순
display(
    OptRandomForest(data, data.columns.drop('Expert Diagnose'), 'Expert Diagnose', 0.3, 2024)
    .sort_values(['max_depth', 'max_features', 'max_leaf_nodes'], ascending = True)
    .sort_values(["재현율", "정확도", "정밀도"], ascending = False)
    .head(20)
)

X_train(84, 17), X_test(36, 17), y_train(84,), y_test(36,)


Unnamed: 0,max_depth,max_features,max_leaf_nodes,정확도,정밀도,재현율
0,3,7,7,0.972222,0.961538,1.0
0,2,5,7,0.944444,0.925926,1.0
0,2,6,4,0.944444,0.925926,1.0
0,2,6,5,0.944444,0.925926,1.0
0,2,6,7,0.944444,0.925926,1.0
0,2,7,3,0.944444,0.925926,1.0
0,2,7,4,0.944444,0.925926,1.0
0,2,7,5,0.944444,0.925926,1.0
0,2,7,6,0.944444,0.925926,1.0
0,2,7,7,0.944444,0.925926,1.0


In [71]:
data = pd.read_csv("./data/Preprocessed/DataSet4.csv", index_col = "id")
data

Unnamed: 0_level_0,Gender,Age,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2,0.0,0.365854,1.0,0.0,0.897,0.4,0.0,0.4,0.75,0.321429,0.5,0.250000,0.0,0.0,1.0
8,0.5,0.146341,0.4,0.0,0.590,1.0,0.0,0.4,0.50,0.071429,0.0,0.250000,0.2,0.5,0.0
26,0.0,0.317073,0.6,0.0,0.703,1.0,0.0,0.2,0.75,0.142857,0.0,0.750000,0.0,0.5,0.0
30,0.5,0.243902,0.6,0.0,0.559,0.4,0.0,0.6,0.50,0.285714,0.5,0.333333,0.8,0.5,1.0
32,0.5,0.170732,0.8,0.0,0.813,0.6,0.0,0.4,0.50,0.785714,0.5,0.083333,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140685,0.5,0.219512,1.0,0.0,0.575,1.0,0.0,0.4,0.25,0.035714,0.5,0.583333,0.0,0.5,0.0
140686,0.0,0.219512,0.4,0.0,0.940,0.6,0.0,0.2,0.75,0.535714,0.0,0.000000,0.4,0.5,0.0
140689,0.0,0.317073,0.6,0.0,0.661,0.8,0.0,0.4,0.25,0.928571,0.0,1.000000,0.2,0.0,0.0
140690,0.5,0.000000,1.0,0.0,0.688,0.4,0.0,0.2,0.75,0.035714,0.5,0.833333,0.8,0.0,1.0


In [72]:
# 정렬 순서
# 이웃 수로 오름차순으로 먼저 정렬
# 그 다음 재현율, 정확도, 정밀도 기준으로 내림차순
display(
    OptKNeighbours(data, data.columns.drop('Depression'), 'Depression', 0.3, 2024)
    .sort_values("이웃 수", ascending = True)
    .sort_values(["재현율", "정확도", "정밀도"], ascending = False)
    .head(20)
)

X_train(19530, 14), X_test(8371, 14), y_train(19530,), y_test(8371,)


Unnamed: 0,이웃 수,정확도,정밀도,재현율
0,99,0.84064,0.841097,0.901732
0,97,0.840521,0.84158,0.900725
0,95,0.840401,0.841551,0.900524
0,93,0.840999,0.842472,0.900322
0,87,0.84064,0.842254,0.899919
0,91,0.84064,0.842383,0.899718
0,89,0.839924,0.84156,0.899517
0,85,0.840162,0.842523,0.89851
0,83,0.840521,0.84313,0.898308
0,81,0.840401,0.842971,0.898308


In [73]:
# 정렬 순서
# max_depth, max_features, max_leaf_nodes 기준으로 오름차순으로 먼저 정렬
# 그 다음 재현율, 정확도, 정밀도 기준으로 내림차순
display(
    OptDecisionTree(data, data.columns.drop('Depression'), 'Depression', 0.3, 2024)
    .sort_values(['max_depth', 'max_features', 'max_leaf_nodes'], ascending = True)
    .sort_values(["재현율", "정확도", "정밀도"], ascending = False)
    .head(20)
)

X_train(19530, 14), X_test(8371, 14), y_train(19530,), y_test(8371,)


Unnamed: 0,max_depth,max_features,max_leaf_nodes,정확도,정밀도,재현율
0,1,1,2,0.593239,0.593239,1.0
0,1,1,4,0.593239,0.593239,1.0
0,1,1,5,0.593239,0.593239,1.0
0,1,1,7,0.593239,0.593239,1.0
0,1,1,8,0.593239,0.593239,1.0
0,1,2,2,0.593239,0.593239,1.0
0,1,2,3,0.593239,0.593239,1.0
0,1,2,8,0.593239,0.593239,1.0
0,1,3,2,0.593239,0.593239,1.0
0,1,3,3,0.593239,0.593239,1.0


In [74]:
# 정렬 순서
# max_depth, max_features, max_leaf_nodes 기준으로 오름차순으로 먼저 정렬
# 그 다음 재현율, 정확도, 정밀도 기준으로 내림차순
display(
    OptRandomForest(data, data.columns.drop('Depression'), 'Depression', 0.3, 2024)
    .sort_values(['max_depth', 'max_features', 'max_leaf_nodes'], ascending = True)
    .sort_values(["재현율", "정확도", "정밀도"], ascending = False)
    .head(20)
)

X_train(19530, 14), X_test(8371, 14), y_train(19530,), y_test(8371,)


Unnamed: 0,max_depth,max_features,max_leaf_nodes,정확도,정밀도,재현율
0,1,1,4,0.605065,0.600338,1.0
0,1,1,5,0.604826,0.600193,1.0
0,1,1,2,0.598853,0.596588,1.0
0,8,1,2,0.597778,0.595944,1.0
0,1,1,8,0.605185,0.600435,0.999799
0,1,1,7,0.602676,0.598914,0.999799
0,3,1,2,0.611994,0.604629,0.999597
0,2,1,2,0.609724,0.603232,0.999597
0,1,1,3,0.62394,0.612111,0.999396
0,1,1,6,0.623462,0.61181,0.999396


In [80]:
data = pd.read_csv("./data/Preprocessed/DataSet5.csv", index_col = "Survey_id")
data

Unnamed: 0_level_0,Unnamed: 0,sex,Age,Married,Number_children,education_level,total_members,gained_asset,durable_asset,save_asset,...,incoming_salary,incoming_own_farm,incoming_business,incoming_no_business,incoming_agricultural,farm_expenses,labor_primary,lasting_investment,no_lasting_investmen,depressed
Survey_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.647759,0,1.0,0.148649,1.0,0.363636,0.500000,0.363636,0.289336,0.228242,0.232843,...,0.0,0.0,0.0,0.0,0.298638,0.312860,0.0,0.285164,0.283009,0.0
0.522409,1,1.0,0.081081,1.0,0.272727,0.388889,0.363636,0.289336,0.228242,0.232843,...,0.0,0.0,0.0,0.0,0.298638,0.312860,0.0,0.285164,0.283009,1.0
0.832633,2,1.0,0.067568,1.0,0.272727,0.444444,0.363636,0.289336,0.228242,0.232843,...,0.0,0.0,0.0,0.0,0.298638,0.312860,0.0,0.285164,0.283009,0.0
0.745098,3,1.0,0.135135,1.0,0.181818,0.500000,0.272727,0.529764,0.196438,0.495968,...,0.0,1.0,0.0,1.0,0.220813,0.185952,0.0,0.077555,0.694233,0.0
0.563725,4,0.0,0.567568,0.0,0.363636,0.500000,0.454545,0.832785,0.172846,0.232843,...,1.0,0.0,0.0,0.0,0.533454,0.205872,1.0,0.201528,0.434998,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.177871,1424,1.0,0.108108,1.0,0.090909,0.333333,0.363636,0.289336,0.228242,0.232843,...,0.0,0.0,0.0,0.0,0.298638,0.312860,0.0,0.285164,0.283009,0.0
0.382353,1425,1.0,0.148649,1.0,0.363636,0.500000,0.454545,0.155725,0.239917,0.153714,...,0.0,1.0,0.0,0.0,0.228193,0.007547,0.0,0.017602,0.474837,0.0
0.624650,1426,1.0,0.662162,0.0,0.000000,0.000000,0.000000,0.426261,0.228242,0.224449,...,0.0,1.0,0.0,0.0,0.122861,0.102465,0.0,0.466631,0.103775,1.0
0.253501,1427,1.0,0.459459,1.0,0.090909,0.611111,0.363636,0.289336,0.228242,0.232843,...,0.0,0.0,0.0,0.0,0.298638,0.312860,0.0,0.285164,0.283009,0.0


In [76]:
# 정렬 순서
# 이웃 수로 오름차순으로 먼저 정렬
# 그 다음 재현율, 정확도, 정밀도 기준으로 내림차순
display(
    OptKNeighbours(data, data.columns.drop('depressed'), 'depressed', 0.3, 2024)
    .sort_values("이웃 수", ascending = True)
    .sort_values(["재현율", "정확도", "정밀도"], ascending = False)
)

X_train(986, 21), X_test(423, 21), y_train(986,), y_test(423,)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Unnamed: 0,이웃 수,정확도,정밀도,재현율
0,3,0.810875,0.16,0.063492
0,5,0.836879,0.25,0.047619
0,13,0.851064,0.0,0.0
0,15,0.851064,0.0,0.0
0,19,0.851064,0.0,0.0
0,21,0.851064,0.0,0.0
0,23,0.851064,0.0,0.0
0,25,0.851064,0.0,0.0
0,27,0.851064,0.0,0.0
0,29,0.851064,0.0,0.0


In [77]:
# 정렬 순서
# max_depth, max_features, max_leaf_nodes 기준으로 오름차순으로 먼저 정렬
# 그 다음 재현율, 정확도, 정밀도 기준으로 내림차순
display(
    OptDecisionTree(data, data.columns.drop('depressed'), 'depressed', 0.3, 2024)
    .sort_values(['max_depth', 'max_features', 'max_leaf_nodes'], ascending = True)
    .sort_values(["재현율", "정확도", "정밀도"], ascending = False)
)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


X_train(986, 21), X_test(423, 21), y_train(986,), y_test(423,)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Unnamed: 0,max_depth,max_features,max_leaf_nodes,정확도,정밀도,재현율
0,7,5,8,0.853428,0.545455,0.095238
0,5,8,5,0.820331,0.217391,0.079365
0,5,5,8,0.853428,0.571429,0.063492
0,5,7,8,0.851064,0.500000,0.063492
0,5,2,8,0.848700,0.444444,0.063492
...,...,...,...,...,...,...
0,4,1,7,0.839243,0.000000,0.000000
0,6,1,8,0.839243,0.000000,0.000000
0,6,5,4,0.839243,0.000000,0.000000
0,4,4,8,0.829787,0.000000,0.000000


In [78]:
# 정렬 순서
# max_depth, max_features, max_leaf_nodes 기준으로 오름차순으로 먼저 정렬
# 그 다음 재현율, 정확도, 정밀도 기준으로 내림차순
display(
    OptRandomForest(data, data.columns.drop('depressed'), 'depressed', 0.3, 2024)
    .sort_values(['max_depth', 'max_features', 'max_leaf_nodes'], ascending = True)
    .sort_values(["재현율", "정확도", "정밀도"], ascending = False)
)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


X_train(986, 21), X_test(423, 21), y_train(986,), y_test(423,)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Unnamed: 0,max_depth,max_features,max_leaf_nodes,정확도,정밀도,재현율
0,1,1,2,0.851064,0.0,0.0
0,1,1,3,0.851064,0.0,0.0
0,1,1,4,0.851064,0.0,0.0
0,1,1,5,0.851064,0.0,0.0
0,1,1,6,0.851064,0.0,0.0
...,...,...,...,...,...,...
0,8,8,4,0.851064,0.0,0.0
0,8,8,5,0.851064,0.0,0.0
0,8,8,6,0.851064,0.0,0.0
0,8,8,7,0.851064,0.0,0.0
