In [1]:
from sklearn.datasets import load_digits,load_breast_cancer
digits = load_digits()
cancer = load_breast_cancer()

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
lr = LogisticRegression()
knn = KNeighborsClassifier()
svc = SVC()

In [3]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
digits_scaled = scaler.fit_transform(digits.data)
X_train, X_test, y_train, y_test = train_test_split(digits_scaled, digits.target, random_state=2021, test_size=0.2)

In [4]:
lr.fit(X_train,y_train)
pred = lr.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f'정확도: {acc:.4f}')

정확도: 0.9528


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [5]:
svc.fit(X_train,y_train)
pred = svc.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f'정확도: {acc:.4f}')

정확도: 0.9778


In [6]:
# 제일 높음
knn.fit(X_train,y_train)
pred = knn.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f'정확도: {acc:.4f}')

정확도: 0.9889


In [7]:
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [8]:
from sklearn.model_selection import GridSearchCV
params = {
    'n_neighbors': [5,6,7],
    'leaf_size': [25,28,31],
    'p': [1,2,3]
}
Knn_clf = KNeighborsClassifier(n_jobs=1)
grid_cv = GridSearchCV(Knn_clf, param_grid=params, cv=4, n_jobs=1,scoring='accuracy', refit=True)
grid_cv.fit(X_train,y_train)
print(f'최고 평균 정확도: {grid_cv.best_score_:.4f}')
print('최적 파라미터: ', grid_cv.best_params_)

최고 평균 정확도: 0.9833
최적 파라미터:  {'leaf_size': 25, 'n_neighbors': 5, 'p': 3}


In [9]:
best = grid_cv.best_estimator_
pred = best.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f'최적 파라미터 정확도: {acc:.4f}')

최적 파라미터 정확도: 0.9917


### breast cancer

In [10]:
scaler = MinMaxScaler()
cancer_scaled = scaler.fit_transform(cancer.data)
X_train, X_test, y_train, y_test = train_test_split(cancer_scaled, cancer.target, random_state=2021, test_size=0.2)

In [11]:
lr.fit(X_train,y_train)
pred = lr.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f'정확도: {acc:.4f}')

정확도: 0.9561


In [12]:
# 제일 높음
svc.fit(X_train,y_train)
pred = svc.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f'정확도: {acc:.4f}')

정확도: 0.9912


In [13]:
knn.fit(X_train,y_train)
pred = knn.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f'정확도: {acc:.4f}')

정확도: 0.9825


In [14]:
svc.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [29]:
params = {
    'C':list(range(1,10)),
    'cache_size': list(range(80,230)),
    'degree': list(range(1,40))
}
svc_clf = SVC()
grid_cv = GridSearchCV(svc_clf, param_grid=params,n_jobs=1, cv=3,scoring='accuracy', refit=True)
grid_cv.fit(X_train,y_train)
print(f'최고 평균 정확도: {grid_cv.best_score_:.4f}')
print('최적 파라미터: ', grid_cv.best_params_)

최고 평균 정확도: 0.9714
최적 파라미터:  {'C': 2, 'cache_size': 80, 'degree': 1}


In [22]:
best = grid_cv.best_estimator_
pred = best.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f'최적 파라미터  정확도: {acc:.4f}')

최적 파라미터  정확도: 0.9912
