In [None]:
import torch
from sklearn.datasets import load_breast_cancer

In [None]:
dataset = load_breast_cancer()
X = dataset['data']
y = dataset['target']

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_std =scaler.fit_transform(X)

from sklearn.decomposition import PCA

pca = PCA(10)
X_reduced = pca.fit_transform(X_std)
print(pca.explained_variance_ratio_)

from sklearn.model_selection import train_test_split

X_train,X_test ,y_train, y_test = train_test_split(X,y,random_state=42)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

pipe1 = Pipeline([('scaler',StandardScaler()),('pca',PCA(5)),('model',LogisticRegression())])

pipe1.fit(X_train,y_train)

print(pipe1.score(X_test,y_test))


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

pipe = Pipeline([('pca',PCA(5)),('model',DecisionTreeClassifier())])

pipe.fit(X_train,y_train)

print(pipe.score(X_test,y_test))


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

pipe = Pipeline([('pca',PCA(5)),('model',RandomForestClassifier(n_estimators=50))])

pipe.fit(X_train,y_train)

print(pipe.score(X_test,y_test))


In [None]:
from sklearn.model_selection import GridSearchCV
import catboost
model = LogisticRegression()

parameters  = [{'C':[.0001,.001,.01,.1, 1.0, 10., 100.],
                'fit_intercept' : [True,False],
                'max_iter': [1000,5000,10000,100000]},
               {'max_depth': [3,5,7,10,13],
                'criterion': ['gini','entropy']},
               {'n_estimators':[20,50,100,200],
                'max_depth': [3,5,7,10,13],
                'criterion': ['gini','entropy']},
               { 'iterations' : [100,200,500,700,1000],
                'learning_rate': [0.001,0.01,0.1, 1.0,10.,100.],
                'depth':[2,4,6,8,10] }
                ]

models = [('LogisticRegression',LogisticRegression()),
          ('DecisionTreeClassifier',DecisionTreeClassifier()),
          ('RandomForestClassifier',RandomForestClassifier()),
          ('CatBoostClassifier',catboost.CatBoostClassifier())
          ]

for model,params in zip(models,parameters):
    search = GridSearchCV(model[1],params)
    search.fit(X_std,y_train)
    print(f'{model[0]} best params: \n {search.best_params_}')
    print(f'Score: \n {search.best_score_}')