# Scikit-learn Basics

### Logistic Regression

In [55]:
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

In [56]:
dataset = datasets.load_iris()

In [57]:
model = LogisticRegression()
model.fit(dataset.data, dataset.target)
print(model)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)


In [58]:
expected = dataset.target
predicted = model.predict(dataset.data)

In [30]:
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00        50
          1       0.98      0.90      0.94        50
          2       0.91      0.98      0.94        50

avg / total       0.96      0.96      0.96       150

[[50  0  0]
 [ 0 45  5]
 [ 0  1 49]]


### Naive Bayes

In [59]:
from sklearn.naive_bayes import GaussianNB

In [102]:
wine = datasets.load_wine()

In [103]:
model = GaussianNB()
model.fit(wine.data, wine.target)
print(model)

GaussianNB(priors=None)


In [104]:
expected = wine.target
predicted = model.predict(wine.data)

In [105]:
print(metrics.classification_report(expectedA, predictedA))
print(metrics.confusion_matrix(expectedA, predictedA))

             precision    recall  f1-score   support

          0       1.00      0.98      0.99        59
          1       0.99      0.99      0.99        71
          2       0.98      1.00      0.99        48

avg / total       0.99      0.99      0.99       178

[[58  1  0]
 [ 0 70  1]
 [ 0  0 48]]


### k-Nearest Neighbour

In [65]:
from sklearn.neighbors import KNeighborsClassifier

In [98]:
cancer = datasets.load_breast_cancer()

In [99]:
model = KNeighborsClassifier()
model.fit(cancer.data, cancer.target)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [100]:
expected = cancer.target
predicted = model.predict(cancer.data)


In [101]:
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          0       0.95      0.90      0.93       212
          1       0.94      0.97      0.96       357

avg / total       0.95      0.95      0.95       569

[[191  21]
 [  9 348]]


### Decision Trees (CART)

In [85]:
from sklearn.tree import DecisionTreeClassifier

In [88]:
digit = datasets.load_digits()

In [89]:
model = DecisionTreeClassifier()
model.fit(digit.data, digit.target)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [94]:
expected = digit.target
predicted = model.predict(digit.data)

In [95]:
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00       178
          1       1.00      1.00      1.00       182
          2       1.00      1.00      1.00       177
          3       1.00      1.00      1.00       183
          4       1.00      1.00      1.00       181
          5       1.00      1.00      1.00       182
          6       1.00      1.00      1.00       181
          7       1.00      1.00      1.00       179
          8       1.00      1.00      1.00       174
          9       1.00      1.00      1.00       180

avg / total       1.00      1.00      1.00      1797

[[178   0   0   0   0   0   0   0   0   0]
 [  0 182   0   0   0   0   0   0   0   0]
 [  0   0 177   0   0   0   0   0   0   0]
 [  0   0   0 183   0   0   0   0   0   0]
 [  0   0   0   0 181   0   0   0   0   0]
 [  0   0   0   0   0 182   0   0   0   0]
 [  0   0   0   0   0   0 181   0   0   0]
 [  0   0   0   0   0   0   0 179   0   0]
 [  0   0   0   0

### Support Vector Machine

In [106]:
from sklearn.svm import SVC

In [107]:
diabetes = datasets.load_diabetes()

In [108]:
model = SVC()
model.fit(diabetes.data, diabetes.target)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [109]:
expected = diabetes.target
predicted = model.predict(diabetes.data)

In [110]:
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

       25.0       0.00      0.00      0.00         1
       31.0       0.00      0.00      0.00         1
       37.0       0.00      0.00      0.00         1
       39.0       0.00      0.00      0.00         2
       40.0       0.00      0.00      0.00         1
       42.0       0.00      0.00      0.00         3
       43.0       0.00      0.00      0.00         1
       44.0       0.00      0.00      0.00         1
       45.0       0.00      0.00      0.00         1
       47.0       0.00      0.00      0.00         2
       48.0       0.00      0.00      0.00         3
       49.0       0.00      0.00      0.00         3
       50.0       0.00      0.00      0.00         1
       51.0       0.00      0.00      0.00         3
       52.0       0.00      0.00      0.00         4
       53.0       0.00      0.00      0.00         4
       54.0       0.00      0.00      0.00         1
       55.0       0.00      0.00      0.00   

  'precision', 'predicted', average, warn_for)
