# Iniciando

In [None]:
from sklearn.linear_model import RidgeClassifier
from sklearn import preprocessing
import pandas as pd

In [None]:
data = pd.DataFrame([[5.0, 600, 'Cerveja'], 
                     [4.5, 585, 'Cerveja'], 
                     [13.0, 690, 'Vinho'], 
                     [12.0, 710, 'Vinho']], 
                    columns = ['Teor Alcoólico', 'Comprimento de Onda', 'Bebida'])
data.head()

In [None]:
x_true = data[['Teor Alcoólico', 'Comprimento de Onda']]
y_true = data['Bebida']

In [None]:
le = preprocessing.LabelEncoder()
y_true = le.fit_transform(y_true)

In [None]:
clf = RidgeClassifier()
clf.fit(x_true, y_true)

In [None]:
x_test = [[4.3, 590]]
pred = le.inverse_transform(clf.predict(x_test))
print(pred)

# Exemplo Prático - Classificação

- Documentação:
__[Árvore de Decisão](http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier)__
__[KFold](http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)__
__[Accuracy](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html#sklearn.metrics.accuracy_score)__

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None, names = ['Type','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315 of diluted wines','Proline'])
df.head()

In [None]:
x = df[df.columns[1:]]
y = df['Type']

In [None]:
kf = KFold(n_splits=4)
score = []
for train_index, test_index in kf.split(x):
    x_train, x_test = x.values[train_index], x.values[test_index]
    y_train, y_test = y.values[train_index], y.values[test_index]
    clf = DecisionTreeClassifier()
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    score.append(accuracy_score(y_pred, y_test))

In [None]:
np.mean(score)

# Exemplo Prático - Regressão

- Documentação:
__[Regressão Linear](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression)__
__[MinMaxScaler](http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html#sklearn.preprocessing.MinMaxScaler)__

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler

def mape(y_pred, y_test):
    return np.mean(np.abs(y_test-y_pred) / y_test) * 100

In [None]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', sep=';').append(pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', sep=';'), ignore_index=True)
df.head()

In [None]:
x = df[df.columns[:-1]].values
y = df['quality'].values

#scaler = MinMaxScaler()
#x = scaler.fit_transform(x)

In [None]:
kf = KFold(n_splits=4)
score = []
for train_index, test_index in kf.split(x, y):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    reg = LinearRegression()
    reg.fit(x_train, y_train)
    y_pred = reg.predict(x_test)
    score.append(mape(y_pred, y_test))

In [None]:
np.mean(score)

# Desafio

 - Link para a base de dados: https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Employee+Churn+in+Python/HR_comma_sep.csv
 - Coluna target: 'left'