# 1.官方文档

## [1.1 scikit-learn Tutorials](https://scikit-learn.org/stable/tutorial/index.html)
## [1.2 User Guide](https://scikit-learn.org/stable/user_guide.html)
## [1.3 API Reference](https://scikit-learn.org/stable/modules/classes.html)


## A Basic Example

In [42]:
from sklearn import  neighbors, datasets, preprocessing
from sklearn.model_selection import  train_test_split
from sklearn.metrics import accuracy_score

In [43]:
iris = datasets.load_iris()
X,y = iris.data[:, :2], iris.target

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=33)

In [45]:
scaler = preprocessing.StandardScaler().fit(X_train)

In [46]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [47]:
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [12]:
accuracy_score(y_test, y_pred)

0.631578947368421

In [52]:
scaler = preprocessing.StandardScaler().fit(X_train)

In [53]:
scaler.scale_

array([0.86060128, 0.47555603])

In [54]:
scaler.mean_

array([5.78392857, 3.05982143])

In [57]:
X_train = scaler.transform(X_train)
X_train[:3,:]

array([[-7.77925468, -9.79395666],
       [-7.91427403, -6.2565369 ],
       [-6.02400309, -9.79395666]])

In [59]:
# 对测试集做一样的变换
X_test = scaler.transform(X_test)
X_test[:3,:]

array([[-6.83411921, -7.14089184],
       [-5.48392568, -6.2565369 ],
       [-8.18431274, -5.81435943]])

In [62]:
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [63]:
y_pred = knn.predict(X_test)

In [64]:
sum(y_test==y_pred)/float(y_test.shape[0])

0.6052631578947368

In [65]:
accuracy_score(y_test, y_pred)

0.6052631578947368

In [49]:
# y_test == y_pred
sum(y_test==y_pred)/float(y_test.shape[0])

0.631578947368421

## Loading The Data

In [21]:
import numpy as np
X = np.random.random((11,5))
y = np.array(['M','M','F','F','M','F','M','M','F','F','F'])
X[X<0.7] =0

## Training And Test Data

In [22]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

## Preprocessing The Data

In [28]:
## standardization
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
standardized_X = scaler.transform(X_train)
standardized_X_test = scaler.transform(X_test)

In [29]:
## Normalization
from sklearn.preprocessing import Normalizer
scaler = Normalizer().fit(X_train)
normalizer_X = scaler.transform(X_train)
normalizer_X_test = scaler.transform(X_test)

In [30]:
## Binarization
from sklearn.preprocessing import Binarizer
binarization = Binarizer(threshold=0.0).fit(X)
binary_X = binarization.transform(X)

In [31]:
## Encoding Categorical Features
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
y = enc.fit_transform(y)

In [32]:
## imputing Missing Values
from sklearn.preprocessing import Imputer
imp = Imputer(missing_values=0, strategy='mean', axis=0)
imp.fit_transform(X_train)

array([[0.79502238, 0.99495319, 0.92301665, 0.8887351 , 0.84896299],
       [0.74344939, 0.79705015, 0.81267505, 0.85371022, 0.84896299],
       [0.84459114, 0.84163923, 0.8468062 , 0.97878318, 0.84896299],
       [0.79502238, 0.84163923, 0.80472689, 0.85371022, 0.84507136],
       [0.79502238, 0.84163923, 0.8468062 , 0.85371022, 0.85285463],
       [0.79702659, 0.83131174, 0.8468062 , 0.7178711 , 0.84896299],
       [0.79502238, 0.74324182, 0.8468062 , 0.82945152, 0.84896299],
       [0.79502238, 0.84163923, 0.8468062 , 0.85371022, 0.84896299]])

In [33]:
# Generating Polynomial Features
from sklearn.preprocessing import  PolynomialFeatures
poly = PolynomialFeatures(5)
poly.fit_transform(X)

array([[1.        , 0.        , 0.74324182, ..., 0.        , 0.        ,
        0.        ],
       [1.        , 0.84459114, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [1.        , 0.96077451, 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.4512062 ],
       [1.        , 0.79541831, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [1.        , 0.        , 0.99495319, ..., 0.        , 0.        ,
        0.        ]])

## Create Your Model

### Supervised Learning Estimators