In [1]:
from sklearn import datasets, metrics
from scipy.sparse import csr_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.linear_model  import LogisticRegression, LinearRegression
from sklearn.dummy import DummyRegressor
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.cluster import KMeans



In [2]:
wine = datasets.load_wine()
data, target = wine.data, wine.target
standardizer = StandardScaler()

In [3]:
#krzyzowy K-krotny
logReg = LogisticRegression()
pipeline = make_pipeline(standardizer, logReg)
kf = KFold(n_splits=10, shuffle=True, random_state=1)
cv_results = cross_val_score(pipeline,
                             data,
                             target,
                             cv=kf,
                             scoring='accuracy',
                             n_jobs=-1)

print("K-krotny sprawdzian krzyżowy: ",cv_results.mean())


K-krotny sprawdzian krzyżowy:  0.9826797385620916


In [4]:
#train_test regresja bazowa
data_train, data_test, target_train, target_test = train_test_split(data, target, random_state=0)
lr = LinearRegression()
lr.fit(data_train, target_train)

print("Regresja bazowa: ",lr.score(data_test, target_test))

Regresja bazowa:  0.804353263176954


In [5]:
#train_test klasyfikacja bazowa
data_train, data_test, target_train, target_test = train_test_split(data, target, random_state=0)
rfc = RandomForestClassifier()
rfc.fit(data_train, target_train)

print("Klasyfikacja bazowa: ",rfc.score(data_test, target_test))

Klasyfikacja bazowa:  0.9777777777777777


In [6]:
#klasyfikator binarny
lr = LogisticRegression()
X, y = datasets.make_classification(n_samples = 178,
                          n_features = 14,
                          n_informative = 13,
                          n_redundant = 0,
                          n_classes = 3,
                          random_state = 1)
cv_results = cross_val_score(lr, X, y, scoring='accuracy')

print("Klasyfikator binarny: ",cv_results.mean())

Klasyfikator binarny:  0.7923809523809523


In [7]:
#klasteryzacja
data, _ = datasets.make_blobs(n_samples = 178,
                             n_features = 14,
                             centers = 3,
                             cluster_std = 0.5,
                             shuffle = True,
                             random_state = 1)
model = KMeans(n_clusters=3, random_state=1).fit(data)

print("Klasteryzacja: ",metrics.silhouette_score(data, model.labels_))

Klasteryzacja:  0.9115977723044779
