In [16]:
%matplotlib

Using matplotlib backend: agg


In [17]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import validation_curve
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.dummy import DummyClassifier
from sklearn import datasets

In [18]:
def plot1(X,y):
    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.Set1, edgecolor='k', s=40)
    ax.set_xlabel("sepal length")
    ax.set_ylabel("sepal width")
    ax.set_zlabel("petal length")

In [19]:
def plot2(X,y):
    fig = plt.figure()
    ax = fig.add_subplot()
    ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1, edgecolor='k', s=40)

In [28]:
X = datasets.load_iris(as_frame=True).data

In [29]:
y = datasets.load_iris(as_frame=True).target

In [30]:
X.corrwith(y)

sepal length (cm)    0.782561
sepal width (cm)    -0.426658
petal length (cm)    0.949035
petal width (cm)     0.956547
dtype: float64

In [31]:
X = np.array(X)[:,:3]

In [32]:
y = np.array(y)

In [11]:
model = DummyClassifier()

In [12]:
cross_val_score(model,X,y).mean()

0.3333333333333333

In [12]:
model = LogisticRegression()

In [13]:
cross_val_score(model,X[:,:2],y).mean()

0.8133333333333332

In [14]:
plot1(X,y)

In [15]:
plot2(X,y)

In [16]:
pipe = Pipeline([('scaler', StandardScaler()),('pca',PCA(n_components=2)),('lr',LogisticRegression())])

In [17]:
cross_val_score(pipe,X,y).mean()

0.8400000000000001

In [18]:
pipe2 = Pipeline([('scaler', StandardScaler()),('pca',PCA(n_components=2)),('pf',PolynomialFeatures()),('lr',LogisticRegression())])

In [19]:
parameters = {
    'pf__degree':[1,2,3]
}

In [20]:
plop = GridSearchCV(pipe2,parameters)

In [21]:
plop.fit(X,y)

GridSearchCV(estimator=Pipeline(steps=[('scaler', StandardScaler()),
                                       ('pca', PCA(n_components=2)),
                                       ('pf', PolynomialFeatures()),
                                       ('lr', LogisticRegression())]),
             param_grid={'pf__degree': [1, 2, 3]})

In [22]:
plop.cv_results_

{'mean_fit_time': array([0.00955739, 0.01141939, 0.01562824]),
 'std_fit_time': array([0.00064573, 0.00063878, 0.00101191]),
 'mean_score_time': array([0.00068321, 0.00057297, 0.00055413]),
 'std_score_time': array([1.94125707e-04, 2.09675010e-05, 1.41053558e-05]),
 'param_pf__degree': masked_array(data=[1, 2, 3],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'pf__degree': 1}, {'pf__degree': 2}, {'pf__degree': 3}],
 'split0_test_score': array([0.73333333, 0.8       , 0.8       ]),
 'split1_test_score': array([0.86666667, 0.9       , 0.83333333]),
 'split2_test_score': array([0.8       , 0.76666667, 0.8       ]),
 'split3_test_score': array([0.9       , 0.86666667, 0.9       ]),
 'split4_test_score': array([0.9, 0.9, 0.9]),
 'mean_test_score': array([0.84      , 0.84666667, 0.84666667]),
 'std_test_score': array([0.06463573, 0.05416026, 0.04521553]),
 'rank_test_score': array([3, 1, 1], dtype=int32)}

In [23]:
plop.predict

<function sklearn.model_selection._search.BaseSearchCV.predict(self, X)>