# 머신러닝 코드 정리

In [1]:
# 라이브러리 임포트
from sklearn.datasets import load_breast_cancer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import tensorflow as tf
from tensorflow import keras

In [14]:
# 데이터 가져오기
url = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/main/breast_cancer.csv"
df = pd.read_csv(url, index_col=0)

x = df.drop('label', axis=1)
y = df['label']

# 스케일링
scaler = StandardScaler()
x = scaler.fit_transform(x)
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.25,random_state=0)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(426, 30) (426,)
(143, 30) (143,)


## 1. Decision Tree`

In [5]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=0)
dt_model.fit(x_train, y_train)
y_pred_dt = dt_model.predict(x_test)

print("DT Accuracy:", accuracy_score(y_test, y_pred_dt))
print(confusion_matrix(y_test, y_pred_dt))

DT Accuracy: 0.8811188811188811
[[50  3]
 [14 76]]


## 2. Random Forest

In [6]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(random_state=0)
rf_model.fit(x_train, y_train)
y_pred_rf = rf_model.predict(x_test)

print("RF Accuracy:", accuracy_score(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))

RF Accuracy: 0.972027972027972
[[52  1]
 [ 3 87]]


## 3. Support Vector Machine

In [8]:
from sklearn.svm import SVC

svm_model = SVC(random_state=0)
svm_model.fit(x_train, y_train)
y_pred_svm = svm_model.predict(x_test)

print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print(confusion_matrix(y_test, y_pred_svm))

SVM Accuracy: 0.965034965034965
[[50  3]
 [ 2 88]]


## 4. Logistic Regression

In [10]:
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(random_state=0)
lr_model.fit(x_train, y_train)
y_pred_lr = lr_model.predict(x_test)

print("LR Accuracy:", accuracy_score(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))

LR Accuracy: 0.965034965034965
[[50  3]
 [ 2 88]]


## 5. K-Nearest Neighbors

In [12]:
from sklearn.neighbors import KNeighborsClassifier

k_range = range(1, 101)
scores = []

for k in k_range:
    knn_model = KNeighborsClassifier(n_neighbors=k)
    knn_model.fit(x_train, y_train)
    y_pred_knn = knn_model.predict(x_test)
    scores.append(accuracy_score(y_test, y_pred_knn))
max_index = max(scores)
optimal_k = scores.index(max_index)
print("Optimal k: ", optimal_k)

knn_model = KNeighborsClassifier(n_neighbors=optimal_k)
knn_model.fit(x_train, y_train)
y_pred_knn = knn_model.predict(x_test)

print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print(confusion_matrix(y_test, y_pred_knn))

Optimal k:  13
KNN Accuracy: 0.951048951048951
[[48  5]
 [ 2 88]]
