# SCIKIT-LEARN
Visit the official website of __Scikit-learn__ for more content in __[this link](https://scikit-learn.org/stable/)__
<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/0/05/Scikit_learn_logo_small.svg/2880px-Scikit_learn_logo_small.svg.png"
alt="scikitlearn" title="Scikit-learn" height="400" width="400" />

# Importing Libraries

In [None]:
import sklearn
import numpy as np

# Loading Data
Your data needs to be numeric and stored as NumPy arrays or SciPy sparse matrices.
Other types that are convertible to numeric arrays, such as Pandas DataFrame, are
also acceptable.

In [None]:
x = np.random.random((10, 5))
x[x < 0.7] = 0
y = np.array(['M', 'M', 'F', 'F', 'M', 'F', 'M', 'M', 'F', 'F', 'F']

# Preprocessing Data
There are many techniques
- Standardization
- Normalization
- Binarization
- Encoding Categorical Features
- Inputing Missing Values
- Generating Polynomial Features

In [None]:
# Standardization
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(x_train)
standardized_x = scaler.transform(x_train)
standardized_x_test = scaler.transform(x_test)

# Normalization
from sklearn.preprocessing import Normalizer
scaler = Normalizer().fit(x_train)
normalized_x = scaler.transform(x_train)
normalized_x_test = scaler.transform(x_test)

# Binarization
from sklearn.preprocessing import Binarizer
binarizer = Binarizer(threshold=0.0).fit(x)
binary_x = binarizer.transform(x)

# Encoding Categorical Features
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
y = end.fit_transform(y)

# Inputing Missing Values
from sklearn.preprocessing import Imputer
imp = Inputer(missing_values=0, strategy='mean', axis=0)

# Generating Polynomial Features
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(5)
poly.fit_transform(X)

# Training and test data

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)

# Creating the Model
## + Supervised Learning Estimators

In [None]:
# Linear Regression
from sklearn.linear_model import LinearRegression
lr = LinearRegression(normalize=True)

# Support Vector Machine (SVM)
from sklearn.svm import SVC
svc = SVC(kernel='linear')

# Naive Bayes
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()

# KNN
from sklearn import neighbors
knn = neighbors.KNeighborsClassifier(n_neighbors=5)

## + Unsupervised Learning Estimators

In [None]:
# Principal Component Analysis (PCA)
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)

# K Means
from sklearn.cluster import KMeans
k_means = KMeans(n_clusters=3, random_state=0)

# Model Fitting

In [None]:
# Supervised Learning
lr.fit(x, y)
knn.fit(x_train, y_train)
svc.fit(x_train, y_train)

# Unsupervised Learning
k_means.fit(x_train)
pca_model = pca.fit_transform(x_train)

# Prediction

In [None]:
# Supervised Estimators
y_pred = svc.predict(np.random.random((2, 5)))
y_pred = lr.predict(x_test)
y_pred = knn.predict_proba(x_test)

# Unsupervised Estimators
y_pred = k_means.predict(x_test)

# Evaluate the model's performace
## + Classification Metrics

In [None]:
# Accuracy Score
knn.score(x_test, y_test)
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

# Clasification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

# Confusion Matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))

## + Regression Metrics

In [None]:
# Mean Absolute Error
from sklearn.metrics import mean_absolute_error
y_true = [3, -0.5, 2]
mean_absolute_error(y_true, y_pred)

# Mean Squared Error
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred)

# R2 Score
from sklearn.metrics import r2_score
r2_score(y_true, y_pred)

## + Clustering Metrics

In [None]:
# Adjusted Rand Index
from sklearn.metrics import adjusted_rand_score
adjusted_rand_score(y_true, y_pred)

# Homogeneity
from sklearn.metrics import homogeneity_score
homogeneity_score(y_true, y_pred)

# V-Measure
from sklearn.metrics import v_measure_score
metrics.v_measure_score(y_true, y_pred)

# Cross Validation

In [None]:
# CROSS VALIDATION

print(cross_val_score(knn, x_train, y_train, cv=4))
print(cross_val_score(lr, x, y, cv=2))