In [3]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle



In [4]:
iris = datasets.load_iris()

In [5]:
X, y = shuffle(iris.data, iris.target, random_state=7)

In [6]:
iris.data.shape

(150, 4)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

In [8]:
print(y_train.shape)
print(X_train.shape)

(120,)
(120, 4)


In [9]:
#标准化缩放即将样本特征的值转化为均值为0，方差为1的正态分布。
sc_X = StandardScaler()
X_train_std = sc_X.fit_transform(X_train)
X_test_std = sc_X.fit_transform(X_test)

In [10]:
print(X_train_std.shape)

(120, 4)


In [108]:
print(y_train)

[2 0 1 2 1 0 0 0 2 1 2 1 0 0 2 0 0 2 1 2 1 2 1 2 2 2 0 2 1 0 2 2 0 1 0 1 1
 1 0 1 0 0 0 1 0 2 0 0 0 0 2 1 1 2 2 2 0 1 0 2 0 2 2 1 1 2 0 1 0 0 1 0 0 1
 2 1 1 2 0 1 1 1 0 1 2 2 0 1 0 1 1 1 1 2 1 0 1 2 1 2 1 0 1 1 2 2 0 0 0 2 2
 2 2 2 2 1 1 0 0 2]


In [11]:
model = Perceptron(random_state=7)

In [12]:
model.fit(X_train_std, y_train)



Perceptron(alpha=0.0001, class_weight=None, eta0=1.0, fit_intercept=True,
      max_iter=None, n_iter=None, n_jobs=1, penalty=None, random_state=7,
      shuffle=True, tol=None, verbose=0, warm_start=False)

In [13]:
y_pred = model.predict(X_test_std)

In [15]:
from sklearn.metrics import fbeta_score, accuracy_score
print ("Accuracy score on test data: {:.4f}".format(accuracy_score(y_test, y_pred)))
print ("F-score on test data: {:.4f}".format(fbeta_score(y_test, y_pred, beta = 0.5,average='weighted')))

Accuracy score on test data: 0.7667
F-score on test data: 0.7327


In [16]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

clf = Perceptron(random_state=7)
parameters = {'eta0':[0.1,1,10], 'max_iter':[30,40,50]}
scorer = make_scorer(fbeta_score, beta=0.5, average='weighted')
#在分类器上使用网格搜索，使用'scorer'作为评价函数
grid_obj = GridSearchCV(clf, parameters, scoring=scorer)
grid_obj.fit(X_train_std, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=Perceptron(alpha=0.0001, class_weight=None, eta0=1.0, fit_intercept=True,
      max_iter=None, n_iter=None, n_jobs=1, penalty=None, random_state=7,
      shuffle=True, tol=None, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'eta0': [0.1, 1, 10], 'max_iter': [30, 40, 50]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=make_scorer(fbeta_score, beta=0.5, average=weighted),
       verbose=0)

In [17]:
# 得到estimator
best_clf = grid_obj.best_estimator_
# 使用没有调优的模型做预测
predictions = (clf.fit(X_train_std, y_train)).predict(X_test_std)
best_predictions = best_clf.predict(X_test_std)



In [18]:
# 汇报调参前和调参后的分数
print ("\nUnoptimized model\n------")
print ("Accuracy score on test data: {:.4f}".format(accuracy_score(y_test, predictions)))
print ("F-score on test data: {:.4f}".format(fbeta_score(y_test, predictions, beta = 0.5,average='weighted')))
print ("\nOptimized Model\n------")
print ("Final accuracy score on the test data: {:.4f}".format(accuracy_score(y_test, best_predictions)))
print ("Final F-score on the test data: {:.4f}".format(fbeta_score(y_test, best_predictions, beta = 0.5,average='weighted')))


Unoptimized model
------
Accuracy score on test data: 0.7667
F-score on test data: 0.7327

Optimized Model
------
Final accuracy score on the test data: 0.8667
Final F-score on the test data: 0.8761
