# Biblioteca Scikit Learn Python

Prof. Tiago Dias

[Linkedin](https://www.linkedin.com/in/diasctiago/) | [Github](https://github.com/diasctiago) | [Blog](https://www.dadosaocubo.com/)

### Exemplo Python

In [None]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Carregando o conjunto de dados iris
iris = load_iris()
X = iris.data
y = iris.target

# Dividindo o conjunto de dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Criando um modelo de árvore de decisão
model = DecisionTreeClassifier(random_state=42)

# Treinando o modelo
model.fit(X_train, y_train)

# Fazendo previsões com o modelo
y_pred = model.predict(X_test)

# Avaliando a acurácia do modelo
accuracy = accuracy_score(y_test, y_pred)
print('Acurácia:', accuracy)

Acurácia: 1.0


In [None]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Carregando o conjunto de dados Diabetes
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Dividindo o conjunto de dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Criando um modelo de regressão linear
model = LinearRegression()

# Treinando o modelo
model.fit(X_train, y_train)

# Fazendo previsões com o modelo
y_pred = model.predict(X_test)

# Avaliando o erro do modelo
mse = mean_squared_error(y_test, y_pred)
print('Erro quadrático médio:', mse)

Erro quadrático médio: 2821.750981001311


In [None]:
from sklearn.cluster import KMeans
import numpy as np

# Gera dados aleatórios
X = np.random.rand(100, 2)

# Cria um modelo KMeans com 2 clusters
kmeans = KMeans(n_clusters=2, n_init=10)

# Treina o modelo com os dados
kmeans.fit(X)

# Obtém os labels dos clusters para cada ponto
labels = kmeans.labels_

# Imprime os labels
print(labels)

[1 0 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 0 0 0 0 1 1 0 1 1 0 0 1 1 1 0 1 0 1 1
 1 1 0 1 0 1 1 0 1 0 1 1 1 0 1 0 0 1 0 0 0 1 0 0 1 1 1 1 0 1 0 0 0 1 1 1 0
 1 1 0 0 1 0 1 0 1 0 0 1 0 1 1 0 0 0 0 0 1 1 0 1 1 0]


In [None]:
from sklearn.decomposition import PCA
import numpy as np

# Gera dados aleatórios
X = np.random.rand(100, 4)

# Cria um modelo PCA com 2 componentes
pca = PCA(n_components=2)

# Treina o modelo com os dados
pca.fit(X)

# Projeta os pontos no espaço de 2 dimensões
X_pca = pca.transform(X)

# Imprime os pontos projetados
print(X_pca)

[[-0.11979853 -0.51028508]
 [-0.26049993  0.00082723]
 [-0.15223476  0.13258023]
 [ 0.05160822 -0.29865979]
 [ 0.24778084 -0.16799092]
 [ 0.44706783 -0.58351439]
 [ 0.44925577 -0.1865386 ]
 [-0.38869325  0.08900743]
 [-0.0114034   0.75084894]
 [-0.01347594  0.18693068]
 [-0.23767287 -0.00870918]
 [-0.21171663  0.07085296]
 [-0.02580768 -0.65460465]
 [ 0.18096908  0.0223594 ]
 [-0.13548712 -0.28482001]
 [ 0.04061779 -0.26796387]
 [-0.4737834  -0.08012005]
 [-0.39079985  0.50316381]
 [ 0.02880008  0.30971582]
 [-0.04068886 -0.46547792]
 [-0.12040043 -0.42210937]
 [-0.05093569  0.03701388]
 [-0.31248111  0.32615411]
 [ 0.22442355 -0.01837029]
 [ 0.6685581  -0.05365048]
 [-0.04990308  0.0180217 ]
 [ 0.49901939 -0.16705548]
 [-0.00076822 -0.21930655]
 [-0.20698797 -0.29966159]
 [ 0.61126337 -0.283131  ]
 [ 0.468507    0.36816496]
 [ 0.56613757  0.06803278]
 [-0.22400934 -0.10326564]
 [-0.22496708 -0.01863172]
 [-0.64646421  0.27227745]
 [-0.53475488 -0.21057972]
 [-0.16184897 -0.22445948]
 