In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

# Machine Learning in Python with Scikit-Learn

__Steps__

1. Choose a model by importing its class from Scikit-Learn
2. Choose (non-data) parameters by creating an instance of the above class
3. Arrange data into the labels matrix and features matrix
4. Fit the data to the instance of the model
5. Examine results (e.g. predict, plot)

## Supervised --> Regression

__Data__

In [None]:
x = np.random.uniform(0, 10, (10, 10))
y = [sum(r) for r in x]

In [None]:
x

In [None]:
y

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression(fit_intercept=False)
results = model.fit(x, y)

In [None]:
results.coef_

In [None]:
results.intercept_

## Supervised --> Classification

In [None]:
iris = sns.load_dataset('iris')

In [None]:
iris.head()

In [None]:
iris.describe()

In [None]:
iris['species'].value_counts()

In [None]:
iris.hist()

In [None]:
sns.pairplot(iris, hue='species');

In [None]:
iris.head()

## Using Scikit-Learn for Classification

In [None]:
X = iris.drop('species', axis=1)
Y = iris['species']

In [None]:
X.shape

In [None]:
Y.shape

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=123)

In [None]:
print(X_train.shape)
print(Y_train.shape)

print(X_test.shape)
print(Y_test.shape)

In [None]:
from sklearn.svm import SVC
model = SVC(gamma='auto')
model.fit(X_train, Y_train)
predict = model.predict(X_test)

In [None]:
predict

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(Y_test, predict)

In [None]:
confusion_matrix(Y_test, predict)

In [None]:
print(classification_report(Y_test, predict))

## Model Selection

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB

models = [('Dec Tree', DecisionTreeClassifier()), 
          ('Lin Disc', LinearDiscriminantAnalysis()), ('Gauss', GaussianNB()), 
          ('SVC', SVC(gamma='auto'))]

In [None]:
cv = cross_val_score(model, X_train, Y_train, cv=kf, scoring='accuracy')

results = []

for name, model in models:
    kf = StratifiedKFold(n_splits=10, random_state=123)
    res = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
    res_mean = round(res.mean(), 4)
    res_std  = round(res.std(), 4)
    results.append((name, res_mean, res_std))

In [None]:
results

In [None]:
for line in results:
    print(line[0].ljust(10), str(line[1]).ljust(6), str(line[2]))