<a href="https://colab.research.google.com/github/bumsoft/ml_as/blob/main/Untitled.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, RocCurveDisplay, PrecisionRecallDisplay
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

raw_data = load_breast_cancer()
X = raw_data.data
y = raw_data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 표준화
#scaler = StandardScaler()
#X_train = scaler.fit_transform(X_train)
#X_test = scaler.transform(X_test)

# init model
log_reg = LogisticRegression(random_state=13, solver='liblinear', C=10.0)
dec_tree = DecisionTreeClassifier(random_state=13)
rand_forest = RandomForestClassifier(random_state=13)

# train model
log_reg.fit(X_train, y_train)
dec_tree.fit(X_train, y_train)
rand_forest.fit(X_train, y_train)

#예측
logi_pred = log_reg.predict(X_test)
dec_pred = dec_tree.predict(X_test)
rand_pred = rand_forest.predict(X_test)

# roc예측
logi_pred_roc = log_reg.predict_proba(X_test)[:, 1]
dec_pred_roc = dec_tree.predict_proba(X_test)[:, 1]
rand_pred_roc = rand_forest.predict_proba(X_test)[:, 1]

#결과
logistic_metrics = {
    "Model": ["Logistic Regression"],
    "accuracy": [accuracy_score(y_test, logi_pred)],
    "precision": [precision_score(y_test, logi_pred)],
    "recall": [recall_score(y_test, logi_pred)],
    "F1 Score": [f1_score(y_test, logi_pred)],
    "ROC-AUC Score": [roc_auc_score(y_test, logi_pred_roc)],
}

decision_metrics = {
    "Model": ["Decision Tree"],
    "accuracy": [accuracy_score(y_test, dec_pred)],
    "precision": [precision_score(y_test, dec_pred)],
    "recall": [recall_score(y_test, dec_pred)],
    "F1 Score": [f1_score(y_test, dec_pred)],
    "ROC-AUC Score": [roc_auc_score(y_test, dec_pred_roc)],
}

randforest_metrics = {
    "Model": ["Random Forest"],
    "accuracy": [accuracy_score(y_test, rand_pred)],
    "precision": [precision_score(y_test, rand_pred)],
    "recall": [recall_score(y_test, rand_pred)],
    "F1 Score": [f1_score(y_test, rand_pred)],
    "ROC-AUC Score": [roc_auc_score(y_test, rand_pred_roc)],
}


metrics_df = pd.concat([pd.DataFrame(logistic_metrics), pd.DataFrame(decision_metrics), pd.DataFrame(randforest_metrics)], ignore_index=True)
metrics_df


In [None]:
# Initialize and train Logistic Regression model
log_reg = LogisticRegression(random_state=13, solver='liblinear', C=10.0)
log_reg.fit(X_train, y_train)

# Plot ROC curve using RocCurveDisplay
RocCurveDisplay.from_estimator(log_reg, X_test, y_test)
plt.title('ROC Curve for Logistic Regression')
plt.show()

In [None]:
# Plot Precision-Recall curve using PrecisionRecallDisplay
plt.figure(figsize=(8, 6))
PrecisionRecallDisplay.from_estimator(log_reg, X_test, y_test)
plt.title('Precision-Recall Curve for Logistic Regression')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline

# 파이프라인 설정
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('log_reg', LogisticRegression(max_iter=10000))
])

# 하이퍼파라미터 그리드 설정
param_grid = {
    'log_reg__C': [0.1, 1, 10, 100],
    'log_reg__solver': ['liblinear', 'saga']
}

# GridSearchCV 설정
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')

# 모델 학습
grid_search.fit(X_train, y_train)


# 최적의 모델로 예측
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
y_pred_prob = best_model.predict_proba(X_test)[:, 1]

# 메트릭 계산
metrics = {
    "Model": ["Logistic Regression with GridSearch"],
    "Accuracy": [accuracy_score(y_test, y_pred)],
    "Precision": [precision_score(y_test, y_pred)],
    "Recall": [recall_score(y_test, y_pred)],
    "F1 Score": [f1_score(y_test, y_pred)],
    "ROC-AUC Score": [roc_auc_score(y_test, y_pred_prob)],
}
metrics_df = pd.DataFrame(metrics)
metrics_df
