# 개별 코드 

In [3]:
# 데이터 불러오기
from sklearn import datasets
raw_cancer = datasets.load_breast_cancer()

In [4]:
# 피쳐, 타겟 데이터 지정
X = raw_cancer.data
y = raw_cancer.target

In [5]:
# 트레이닝/테스트 데이터 분할
from sklearn.model_selection import train_test_split
X_tn, X_te, y_tn, y_te=train_test_split(X,y,random_state=0)

In [6]:
#데이터 표준화
from sklearn.preprocessing import StandardScaler
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std  = std_scale.transform(X_te)

In [7]:
# 로지스틱 회귀분석(L2 제약식 적용)
from sklearn.linear_model import LogisticRegression
clf_logi_l2 =  LogisticRegression(penalty='l2')
clf_logi_l2.fit(X_tn_std, y_tn)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [8]:
# 로지스틱 회귀분석 모형(L2 제약식 적용) 추정 계수
print(clf_logi_l2.coef_)
print(clf_logi_l2.intercept_)

[[-0.29792942 -0.58056355 -0.3109406  -0.377129   -0.11984232  0.42855478
  -0.71131106 -0.85371164 -0.46688191  0.11762548 -1.38262136  0.0899184
  -0.94778563 -0.94686238  0.18575731  0.99305313  0.11090349 -0.3458275
   0.20290919  0.80470317 -0.91626377 -0.91726667 -0.8159834  -0.86539197
  -0.45539191  0.10347391 -0.83009341 -0.98445173 -0.5920036  -0.61086989]]
[0.02713751]


In [9]:
# 예측
pred_logistic = clf_logi_l2.predict(X_te_std)
print(pred_logistic)

[0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 1 0
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 1 1
 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0]


In [10]:
# 확률값으로 예측
pred_proba = clf_logi_l2.predict_proba(X_te_std)
print(pred_proba)

[[9.98638613e-01 1.36138656e-03]
 [3.95544804e-02 9.60445520e-01]
 [1.30896362e-03 9.98691036e-01]
 [1.24473354e-02 9.87552665e-01]
 [2.44132101e-04 9.99755868e-01]
 [4.50491513e-03 9.95495085e-01]
 [1.13985968e-04 9.99886014e-01]
 [1.82475894e-03 9.98175241e-01]
 [9.67965506e-05 9.99903203e-01]
 [1.75222878e-06 9.99998248e-01]
 [1.76572612e-01 8.23427388e-01]
 [8.24119135e-02 9.17588087e-01]
 [9.66067493e-06 9.99990339e-01]
 [5.39343196e-01 4.60656804e-01]
 [3.98187854e-01 6.01812146e-01]
 [9.95762760e-01 4.23724017e-03]
 [2.75612083e-03 9.97243879e-01]
 [9.99997097e-01 2.90271401e-06]
 [9.99926506e-01 7.34935682e-05]
 [9.99999997e-01 2.78313939e-09]
 [9.98738365e-01 1.26163489e-03]
 [9.81405399e-01 1.85946008e-02]
 [1.77902039e-02 9.82209796e-01]
 [9.65876713e-04 9.99034123e-01]
 [9.99464578e-01 5.35421808e-04]
 [6.73385015e-04 9.99326615e-01]
 [5.50833875e-05 9.99944917e-01]
 [9.69828919e-01 3.01710813e-02]
 [1.62119075e-03 9.98378809e-01]
 [9.99997821e-01 2.17867101e-06]
 [6.005712

In [14]:
# 정밀도
from sklearn.metrics import precision_score
precision = precision_score(y_te, pred_logistic)
print(precision)

0.9666666666666667


In [10]:
# confusion matrix 확인 
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y_te, pred_logistic)
print(conf_matrix)

[[50  3]
 [ 3 87]]


In [11]:
# 분류 레포트 확인
from sklearn.metrics import classification_report
class_report = classification_report(y_te, pred_logistic)
print(class_report)

              precision    recall  f1-score   support

           0       0.94      0.94      0.94        53
           1       0.97      0.97      0.97        90

    accuracy                           0.96       143
   macro avg       0.96      0.96      0.96       143
weighted avg       0.96      0.96      0.96       143



# 전체 코드

In [1]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report


# 데이터 불러오기
raw_cancer = datasets.load_breast_cancer()

# 피쳐, 타겟 데이터 지정
X = raw_cancer.data
y = raw_cancer.target

# 트레이닝/테스트 데이터 분할
X_tn, X_te, y_tn, y_te=train_test_split(X,y,random_state=0)

# 데이터 표준화
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std  = std_scale.transform(X_te)

# 로지스틱 회귀분석(L2 제약식 적용)
clf_logi_l2 =  LogisticRegression(penalty='l2')
clf_logi_l2.fit(X_tn_std, y_tn)

# 로지스틱 회귀분석 모형(L2 제약식 적용) 추정 계수
print(clf_logi_l2.coef_)
print(clf_logi_l2.intercept_)

# 예측
pred_logistic = clf_logi_l2.predict(X_te_std)
print(pred_logistic)

# 확률값으로 예측
pred_proba = clf_logi_l2.predict_proba(X_te_std)
print(pred_proba)

# 정밀도
precision = precision_score(y_te, pred_logistic)
print(precision)

# confusion matrix 확인 
conf_matrix = confusion_matrix(y_te, pred_logistic)
print(conf_matrix)

# 분류 레포트 확인
class_report = classification_report(y_te, pred_logistic)
print(class_report)

[[-0.29792942 -0.58056355 -0.3109406  -0.377129   -0.11984232  0.42855478
  -0.71131106 -0.85371164 -0.46688191  0.11762548 -1.38262136  0.0899184
  -0.94778563 -0.94686238  0.18575731  0.99305313  0.11090349 -0.3458275
   0.20290919  0.80470317 -0.91626377 -0.91726667 -0.8159834  -0.86539197
  -0.45539191  0.10347391 -0.83009341 -0.98445173 -0.5920036  -0.61086989]]
[0.02713751]
[0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 1 0
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 1 1
 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0]
[[9.98638613e-01 1.36138656e-03]
 [3.95544804e-02 9.60445520e-01]
 [1.30896362e-03 9.98691036e-01]
 [1.24473354e-02 9.87552665e-01]
 [2.44132101e-04 9.99755868e-01]
 [4.50491513e-03 9.95495085e-01]
 [1.13985968e-04 9.99886014e-01]
 [1.82475894e-03 9.98175241e-01]
 [9.67965506e-05 9.99903203e-01]
 [1.75222878e-06 9.99998248e-