In [1]:
from sklearn.datasets import samples_generator



In [2]:
X,y = samples_generator.make_classification(n_features=20, n_informative=3, n_redundant=0,
                                           n_classes=4,n_clusters_per_class=2)

In [5]:
print(X.shape)
print(y.shape)

(100, 20)
(100,)


In [6]:
y

array([0, 2, 3, 1, 1, 0, 2, 0, 0, 0, 2, 2, 3, 3, 0, 2, 1, 1, 2, 0, 3, 0,
       2, 2, 0, 1, 2, 0, 1, 0, 1, 1, 2, 2, 1, 3, 2, 1, 2, 3, 0, 1, 3, 3,
       3, 0, 0, 3, 2, 1, 0, 1, 0, 2, 1, 1, 2, 2, 0, 3, 3, 1, 3, 0, 2, 1,
       0, 3, 3, 1, 3, 3, 1, 3, 0, 1, 1, 1, 3, 3, 2, 0, 3, 0, 3, 1, 2, 2,
       0, 1, 1, 0, 3, 3, 2, 3, 0, 0, 2, 2])

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

In [9]:
from sklearn.feature_selection import SelectKBest, f_regression

In [10]:
anova_filter = SelectKBest(f_regression, k=3)

In [11]:
from sklearn.svm import LinearSVC

In [12]:
clf = LinearSVC()

In [13]:
from sklearn.pipeline import make_pipeline

In [14]:
anova_svm = make_pipeline(anova_filter,clf)

In [15]:
anova_svm.fit(X_train,y_train)

Pipeline(steps=[('selectkbest',
                 SelectKBest(k=3,
                             score_func=<function f_regression at 0x127bc6dc0>)),
                ('linearsvc', LinearSVC())])

In [16]:
y_pred = anova_svm.predict(X_test)
y_pred

array([0, 2, 3, 1, 3, 2, 2, 2, 2, 0, 2, 3, 2, 2, 0, 1, 1, 0, 1, 3, 1, 0,
       2, 3, 3])

In [17]:
anova_svm.score(X_test,y_test)

0.64

In [18]:
from sklearn.metrics import classification_report

In [19]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       0.60      0.38      0.46         8
           2       0.78      1.00      0.88         7
           3       0.50      0.50      0.50         6

    accuracy                           0.64        25
   macro avg       0.62      0.66      0.63        25
weighted avg       0.63      0.64      0.62        25



In [20]:
from sklearn.datasets import load_breast_cancer

In [21]:
cancer = load_breast_cancer()

In [22]:
X_train, X_test,y_train, y_test = train_test_split(cancer.data, cancer.target, random_state = 0)

In [23]:
from sklearn.svm import SVC

In [24]:
from sklearn.preprocessing import MinMaxScaler

In [25]:
pp = make_pipeline(MinMaxScaler(),SVC(gamma='auto'))

In [26]:
pp.fit(X_train,y_train)

Pipeline(steps=[('minmaxscaler', MinMaxScaler()), ('svc', SVC(gamma='auto'))])

In [27]:
pp.score(X_test,y_test)

0.951048951048951

In [28]:
from sklearn.datasets import load_boston

In [29]:
boston = load_boston()

In [31]:
X_train, X_test,y_train, y_test = train_test_split(boston.data, boston.target, random_state = 0)

In [32]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

In [33]:
pp = make_pipeline(StandardScaler(),PolynomialFeatures(),Ridge())

In [34]:
param_grid={'polynomialfeatures__degree':[1,2,3],'ridge__alpha':[0.001,0.01,0.1,1,10,100]}

In [35]:
from sklearn.model_selection import GridSearchCV

In [36]:
grid = GridSearchCV(pp, param_grid=param_grid, cv=5,n_jobs=-1)

In [37]:
grid.fit(X_train,y_train)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('polynomialfeatures',
                                        PolynomialFeatures()),
                                       ('ridge', Ridge())]),
             n_jobs=-1,
             param_grid={'polynomialfeatures__degree': [1, 2, 3],
                         'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]})