# Step 1. Load Iris Dataset

In [16]:
from sklearn import datasets
import pandas as pd
import numpy as np
from pandas import DataFrame
iris = datasets.load_iris()

# Step 2. Make Classification model

In [17]:
#1. petal-width + petal-length --> target : Virginica
X_petal = iris['data'][:, (2,3)] #petal-width + petal-length 만 있는 data
y_log = (iris["target"]==2).astype(np.int) # 1은 Virginica 0은 non

In [18]:
#Do preprocessing
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Imputer
num_pipeline = Pipeline([
        ('imputer', Imputer(strategy="median")),
        ('std_scaler', StandardScaler()),
    ])
X_petal = num_pipeline.fit_transform(X_petal) #정제된 X



In [19]:
from sklearn.model_selection import train_test_split
X_train_log , X_test_log , y_train_log , y_test_log = train_test_split(X_petal,y_log,test_size=0.2,random_state=42)

In [20]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(solver="liblinear", random_state=42)
log_reg.fit(X_train_log, y_train_log)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=42, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False)

In [22]:
print('label이 Virginica인가?? : {}'.format(log_reg.predict([[5,2]])))

label이 Virginica인가?? : [1]


In [24]:
#Softmax Regression
y_softmax = iris["target"] #target label
X_train_sm , X_test_sm , y_train_sm , y_test_sm = train_test_split(X_petal,y_softmax,test_size=0.2,random_state=42)

In [25]:
softmax_reg = LogisticRegression(multi_class="multinomial",solver="lbfgs", C=10, random_state=42)
softmax_reg.fit(X_train_sm, y_train_sm)

LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=None, penalty='l2', random_state=42, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [26]:
print('label은 무엇인가?? : {}'.format(softmax_reg.predict([[5, 2]])))
print('softmax_predict_proba : {}'.format(softmax_reg.predict_proba([[5, 2]])))

label은 무엇인가?? : [2]
softmax_predict_proba : [[1.55990352e-27 6.83934873e-14 1.00000000e+00]]


In [27]:
#스코어 출력 함수
def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())

In [28]:
#Get Accuracy using cross validation
from sklearn.model_selection import cross_val_score
log_accuracy=cross_val_score(log_reg, X_train_log, y_train_log, cv=10, scoring="accuracy")
display_scores(log_accuracy)

Scores: [0.92307692 1.         1.         1.         0.75       0.83333333
 1.         1.         1.         0.90909091]
Mean: 0.9415501165501166
Standard deviation: 0.08384241861847685


In [29]:
softmax_accuracy=cross_val_score(softmax_reg, X_petal, y_softmax, cv=10, scoring="accuracy")
display_scores(softmax_accuracy)

Scores: [1.         0.93333333 1.         0.93333333 0.93333333 0.93333333
 0.86666667 1.         1.         1.        ]
Mean: 0.96
Standard deviation: 0.044221663871405324


In [30]:
log_mse = cross_val_score(log_reg, X_petal, y_log,
                             scoring="neg_mean_squared_error", cv=10)
log_rmse = np.sqrt(-log_mse)
display_scores(log_rmse)

Scores: [0.         0.25819889 0.         0.25819889 0.         0.
 0.36514837 0.36514837 0.25819889 0.        ]
Mean: 0.15048934125817048
Standard deviation: 0.15498265978601788


In [31]:
softmax_mse = cross_val_score(softmax_reg, X_petal, y_softmax,
                             scoring="neg_mean_squared_error", cv=10)
softmax_rmse = np.sqrt(-softmax_mse)
display_scores(softmax_rmse)

Scores: [0.         0.25819889 0.         0.25819889 0.25819889 0.25819889
 0.36514837 0.         0.         0.        ]
Mean: 0.13979439306587552
Standard deviation: 0.143029813910749


In [32]:
#Calculate Precision and Recall
from sklearn.metrics import precision_score,recall_score
def getPR(classifier,X_test,y_test):
    y_score = classifier.predict(X_test)
    precision = precision_score(y_test, y_score,average='micro')
    recall =  recall_score(y_test, y_score,average='micro')
    print('precision score: {}'.format(precision))
    print('recall score: {}'.format(recall))

In [33]:
getPR(log_reg,X_test_log,y_test_log)

precision score: 1.0
recall score: 1.0


In [34]:
getPR(softmax_reg,X_test_sm,y_test_sm)

precision score: 1.0
recall score: 1.0
