In [1]:
import numpy as np
import pandas as pd
import random
import os

from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42)

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [4]:
X_train = train_df.drop(['ID', 'TARGET'], axis = 1)
y_train = train_df['TARGET']
X_test = test_df.drop('ID', axis = 1)

In [5]:
X_tr, X_val, Y_tr, Y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=42)

In [6]:
ordinal_features = ['요일', '범죄발생지']

for feature in ordinal_features:
    le = LabelEncoder()

    # Fit and transform on the training data
    le = le.fit(X_tr[feature])
    X_tr[feature] = le.transform(X_tr[feature])

    # Transform the test data (Note: Only transform, do not fit again to avoid data leakage)
    X_val[feature] = le.transform(X_val[feature])

    # Check if any new labels in test set and add them to the encoder's classes
    for label in np.unique(X_test[feature]):
        if label not in le.classes_:
            le.classes_ = np.append(le.classes_, label)
    X_test[feature] = le.transform(X_test[feature])

In [7]:
X_tr.head()

Unnamed: 0,월,요일,시간,소관경찰서,소관지역,사건발생거리,강수량(mm),강설량(mm),적설량(cm),풍향,안개,짙은안개,번개,진눈깨비,서리,연기/연무,눈날림,범죄발생지
57342,4,6,10,637,10.0,2.116536,0.0,0.0,0.0,215.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7
5323,3,5,4,526,21.0,2.010814,0.0,0.0,0.0,145.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,7
40195,9,2,2,526,21.0,2.061723,0.0,0.0,0.0,65.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7
70005,10,4,4,1926,44.0,0.535115,15.0,0.0,0.0,185.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,7
36629,7,6,6,1428,33.0,0.250833,0.625,0.0,0.0,210.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6


# 기본 모델

In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Softmax Regression 모델 생성, fit

model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X_tr, Y_tr)

# 예측
y_pred = model.predict(X_val)

# 정확도 계산
accuracy = accuracy_score(Y_val, y_pred)

# F1-micro 계산
f1_micro = f1_score(Y_val, y_pred, average = 'micro')

# 정밀도 계산
precision = precision_score(Y_val, y_pred, average='weighted')

# 모델 훈련 시간
training_time = model.n_iter_[-1]

print(f'정확도: {accuracy:.2f}')
print(f'F1-micro: {f1_micro:.2f}')
print(f'정말도 : {precision:.4f}')
print(f'모델학습시간: {training_time} iterations')

정확도: 0.44
F1-micro: 0.44
정말도 : 0.4198
모델학습시간: 1000 iterations


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# RFE를 통해 변수 선택 후 스케일링

In [9]:
from sklearn.feature_selection import RFE

In [10]:
# Softmax Regression 모델 정의
softmax_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state = 42)

# RFE를 사용하여 변수 선택
for n_features in range(1, 17):
    # RFE를 사용하여 특성 선택
    rfe = RFE(estimator=softmax_model, n_features_to_select=n_features)
    X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
    X_test_rfe = rfe.transform(X_val)

    # 특성 선택 후에 스케일링
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_rfe)
    X_test_scaled = scaler.transform(X_test_rfe)

    # Softmax Regression 모델 훈련 및 예측
    softmax_model.fit(X_train_scaled, Y_tr)
    y_pred_softmax = softmax_model.predict(X_test_scaled)

    # 정확도 출력
    accuracy_softmax = accuracy_score(Y_val, y_pred_softmax)
    print(f'n_features_to_select={n_features}, Accuracy: {accuracy_softmax:.4f}')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=1, Accuracy: 0.4312


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=2, Accuracy: 0.4312


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=3, Accuracy: 0.4157


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=4, Accuracy: 0.4169


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=5, Accuracy: 0.4170


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=6, Accuracy: 0.4170


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=7, Accuracy: 0.4168


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=8, Accuracy: 0.4168


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=9, Accuracy: 0.4260


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=10, Accuracy: 0.4260


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=11, Accuracy: 0.4580


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=12, Accuracy: 0.4572


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=13, Accuracy: 0.4597


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=14, Accuracy: 0.4596


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=15, Accuracy: 0.4597


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=16, Accuracy: 0.4603


- 정확도가 전부 비슷하게 나와서 하나씩 진행해봄

In [11]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.feature_selection import RFE

rf_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state = 42)

In [12]:
# n_features_to_select=1
n_features_to_select=1

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=1
Accuracy: 0.4312
Precision: 0.1859
Recall: 0.4312
F1 Score: 0.2598
Confusion Matrix:
[[10919     0     0]
 [ 7642     0     0]
 [ 6761     0     0]]


  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
# n_features_to_select=2
n_features_to_select=2

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=2
Accuracy: 0.4312
Precision: 0.1859
Recall: 0.4312
F1 Score: 0.2598
Confusion Matrix:
[[10919     0     0]
 [ 7642     0     0]
 [ 6761     0     0]]


  _warn_prf(average, modifier, msg_start, len(result))


In [58]:
#n_features_to_select=3
n_features_to_select=3

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=3
Accuracy: 0.4169
Precision: 0.2300
Recall: 0.4169
F1 Score: 0.2777
Confusion Matrix:
[[10245     0   674]
 [ 6522     0  1120]
 [ 6448     0   313]]


  _warn_prf(average, modifier, msg_start, len(result))


In [59]:
# n_features_to_select=4
n_features_to_select=4

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=4
Accuracy: 0.4165
Precision: 0.2259
Recall: 0.4165
F1 Score: 0.2752
Confusion Matrix:
[[10279     0   640]
 [ 6557     0  1085]
 [ 6493     0   268]]


  _warn_prf(average, modifier, msg_start, len(result))


In [60]:
# n_features_to_select=5
n_features_to_select=5

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=5
Accuracy: 0.4166
Precision: 0.2266
Recall: 0.4166
F1 Score: 0.2757
Confusion Matrix:
[[10273     0   646]
 [ 6547     0  1095]
 [ 6485     0   276]]


  _warn_prf(average, modifier, msg_start, len(result))


In [61]:
# n_features_to_select=6

n_features_to_select=6

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=6
Accuracy: 0.4169
Precision: 0.2287
Recall: 0.4169
F1 Score: 0.2769
Confusion Matrix:
[[10258     0   661]
 [ 6536     0  1106]
 [ 6463     0   298]]


  _warn_prf(average, modifier, msg_start, len(result))


In [62]:
# n_features_to_select=7
n_features_to_select=7

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=7
Accuracy: 0.4254
Precision: 0.4142
Recall: 0.4254
F1 Score: 0.3019
Confusion Matrix:
[[10094    80   745]
 [ 6379   168  1095]
 [ 6189    61   511]]


In [63]:
# n_features_to_select=8
n_features_to_select=8

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=8
Accuracy: 0.4256
Precision: 0.4133
Recall: 0.4256
F1 Score: 0.3031
Confusion Matrix:
[[10071    79   769]
 [ 6359   158  1125]
 [ 6154    59   548]]


In [64]:
# n_features_to_select=9
n_features_to_select=9

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=9
Accuracy: 0.4573
Precision: 0.4242
Recall: 0.4573
F1 Score: 0.3875
Confusion Matrix:
[[9370  842  707]
 [5210 1591  841]
 [5239  902  620]]


In [65]:
# n_features_to_select=10
n_features_to_select=10

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=10
Accuracy: 0.4563
Precision: 0.4234
Recall: 0.4563
F1 Score: 0.3867
Confusion Matrix:
[[9347  847  725]
 [5219 1586  837]
 [5247  892  622]]


In [66]:
# n_features_to_select=11
n_features_to_select=11

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=11
Accuracy: 0.4588
Precision: 0.4300
Recall: 0.4588
F1 Score: 0.3929
Confusion Matrix:
[[9298  855  766]
 [5154 1577  911]
 [5130  889  742]]


In [67]:
# n_features_to_select=12
n_features_to_select=12

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=12
Accuracy: 0.4587
Precision: 0.4313
Recall: 0.4587
F1 Score: 0.3946
Confusion Matrix:
[[9249  854  816]
 [5139 1577  926]
 [5080  892  789]]


In [68]:
# n_features_to_select=13
n_features_to_select=13

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

n_features_to_select=13
Accuracy: 0.4605
Precision: 0.4350
Recall: 0.4605
F1 Score: 0.3959
Confusion Matrix:
[[9285  851  783]
 [5194 1590  858]
 [5076  898  787]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [69]:
# n_features_to_select=14
n_features_to_select=14

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


n_features_to_select=14
Accuracy: 0.4616
Precision: 0.4371
Recall: 0.4616
F1 Score: 0.3966
Confusion Matrix:
[[9304  845  770]
 [5204 1603  835]
 [5091  889  781]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [70]:
# n_features_to_select=15
n_features_to_select=15

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


n_features_to_select=15
Accuracy: 0.4434
Precision: 0.4211
Recall: 0.4434
F1 Score: 0.3722
Confusion Matrix:
[[9229  816  874]
 [5809 1081  752]
 [5195  649  917]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [71]:
# n_features_to_select=16
n_features_to_select=16

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


n_features_to_select=16
Accuracy: 0.4434
Precision: 0.4211
Recall: 0.4434
F1 Score: 0.3722
Confusion Matrix:
[[9229  816  874]
 [5809 1081  752]
 [5195  649  917]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


* 정확도 기준으로 n_features_to_select=14이 정확도가 가장 높음

In [72]:
# n_features_to_select=14
n_features_to_select=14
feature_names = ['월', '요일', '시간', '소관경찰서', '소관지역', '사건발생거리', '강수량(mm)', '강설량(mm)',
       '적설량(cm)', '풍향', '안개', '짙은안개', '번개', '진눈깨비', '서리', '연기/연무', '눈날림',
       '범죄발생지']

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 모델 훈련 및 예측
rf_model.fit(X_train_rfe, Y_tr)
y_pred_rf = rf_model.predict(X_test_rfe)

# 평가
acc = accuracy_score(Y_val, y_pred_rf)
precision = precision_score(Y_val, y_pred_rf, average='weighted')
recall = recall_score(Y_val, y_pred_rf, average='weighted')
f1 = f1_score(Y_val, y_pred_rf, average='weighted')
conf_matrix = confusion_matrix(Y_val, y_pred_rf)

# 결과 출력
print(f'n_features_to_select={n_features_to_select}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

# 변수 값 확인
selected_indices = rfe.get_support(indices=True)

# 선택된 특성의 이름 출력
selected_feature_names = [feature_names[idx] for idx in selected_indices]
print(f"Selected Feature Names: {selected_feature_names}")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


n_features_to_select=14
Accuracy: 0.4616
Precision: 0.4371
Recall: 0.4616
F1 Score: 0.3966
Confusion Matrix:
[[9304  845  770]
 [5204 1603  835]
 [5091  889  781]]
Selected Feature Names: ['월', '요일', '시간', '소관지역', '사건발생거리', '강수량(mm)', '강설량(mm)', '적설량(cm)', '풍향', '안개', '짙은안개', '번개', '진눈깨비', '서리']


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# n_features_to_select = 14 기준으로 스케일링 진행

## StandardScaler
- 38초
- Accuracy: 0.4607

In [73]:
n_features_to_select = 14

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 특성 선택 후의 데이터에 스케일링 적용
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_rfe)
X_test_scaled = scaler.transform(X_test_rfe)

# Softmax Regression 모델 정의
rf_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state = 42)

# Softmax Regression 모델 훈련 및 예측
rf_model.fit(X_train_scaled, Y_tr)
y_pred_sm = rf_model.predict(X_test_scaled)

# 정확도 출력
accuracy = accuracy_score(Y_val, y_pred_sm)
print(f'Using {n_features_to_select} features - Accuracy: {accuracy:.4f}')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Using 14 features - Accuracy: 0.4607


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## MinMaxScaler
- 49초
- Accuracy: 0.4609

In [75]:
n_features_to_select = 14

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 특성 선택 후의 데이터에 스케일링 적용
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train_rfe)
X_test_scaled = scaler.transform(X_test_rfe)

# Softmax Regression 모델 정의
rf_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state = 42)

# Softmax Regression 모델 훈련 및 예측
rf_model.fit(X_train_scaled, Y_tr)
y_pred_sm = rf_model.predict(X_test_scaled)

# 정확도 출력
accuracy = accuracy_score(Y_val, y_pred_sm)
print(f'Using {n_features_to_select} features - Accuracy: {accuracy:.4f}')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Using 14 features - Accuracy: 0.4609


## RobustScaler
- 38초
- Accuracy: 0.4609

In [76]:
n_features_to_select = 14

# RFE를 사용하여 특성 선택
rfe = RFE(estimator=rf_model, n_features_to_select=n_features_to_select)
X_train_rfe = rfe.fit_transform(X_tr, Y_tr)
X_test_rfe = rfe.transform(X_val)

# 특성 선택 후의 데이터에 스케일링 적용
scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train_rfe)
X_test_scaled = scaler.transform(X_test_rfe)

# Softmax Regression 모델 정의
rf_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state = 42)

# Softmax Regression 모델 훈련 및 예측
rf_model.fit(X_train_scaled, Y_tr)
y_pred_sm = rf_model.predict(X_test_scaled)

# 정확도 출력
accuracy = accuracy_score(Y_val, y_pred_sm)
print(f'Using {n_features_to_select} features - Accuracy: {accuracy:.4f}')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Using 14 features - Accuracy: 0.4609


-------------