In [1]:
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets


def make_meshgrid(x, y, h=100):
    """Create a mesh of points to plot in

    Parameters
    ----------
    x: data to base x-axis meshgrid on
    y: data to base y-axis meshgrid on
    h: stepsize for meshgrid, optional

    Returns
    -------
    xx, yy : ndarray
    """
    x_min, x_max = x.min() - 1, x.max() + 1
    y_min, y_max = y.min() - 1, y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    return xx, yy

Automatically created module for IPython interactive environment


In [2]:
def plot_contours(ax, clf, xx, yy, **params):
    """Plot the decision boundaries for a classifier.

    Parameters
    ----------
    ax: matplotlib axes object
    clf: a classifier
    xx: meshgrid ndarray
    yy: meshgrid ndarray
    params: dictionary of params to pass to contourf, optional
    """
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    out = ax.contourf(xx, yy, Z, **params)
    return out

In [3]:
#Import numpy and pandas as their standard aliases. Also import other important variables
import numpy as np
import pandas as pd
from sklearn import datasets
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [4]:
# Read the file 'master_dataset.xlsx' into a DataFrame df using the read_xls() function.
df = pd.read_excel('master_dataset.xlsx', sheetname='Sheet1')

In [5]:
# Select the Sales features and call the new dataframe 'df_sales'
df_sales = df.iloc[:,14:95]

In [6]:
# Select 'IsHoliday' column as the target and call the new data frame 'df_targetIsHol'
df_targetIsHol = df['IsHoliday']

In [7]:
# Assign X and y to the features and target data respectively
X = df_sales.values[:, :2]
y = df_targetIsHol.values

In [8]:
from sklearn import svm
# Set classifier =svm.SVC and assign it svm_clf
svm_clf = svm.SVC(gamma=2, C=1)

In [9]:
# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
#C = 1.0  # SVM regularization parameter
#models = svm_clf.fit(X, y)

In [10]:
# Set-up 2x2 grid for plotting.
fig, sub = plt.subplots(2, 2)
plt.subplots_adjust(wspace=0.4, hspace=0.4)


In [11]:
X0, X1 = X[:, 0], X[:, 1]

In [12]:
xx, yy = make_meshgrid(X0, X1)

In [13]:
# title for the plots
titles = ('SVC with linear kernel',
          'LinearSVC (linear kernel)',
          'SVC with RBF kernel',
          'SVC with polynomial (degree 3) kernel')

In [14]:
# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
C = 1.0  # SVM regularization parameter
models = (svm.SVC(kernel='linear', C=C),
          svm.LinearSVC(C=C),
          svm.SVC(kernel='rbf', gamma=0.7, C=C),
          svm.SVC(kernel='poly', degree=3, C=C))

In [15]:
models = (clf.fit(X, y) for clf in models)

In [None]:
#for clf, title, ax in zip(models, titles, sub.flatten()):
#    plot_contours(ax, clf, xx, yy,
 #                 cmap=plt.cm.coolwarm, alpha=0.8)

In [None]:
for clf, title, ax in zip(models, titles, sub.flatten()):
    plot_contours(ax, clf, xx, yy,
                 cmap=plt.cm.coolwarm, alpha=0.8)
    ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xlabel('Jewelry')
    ax.set_ylabel('Pet')
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(title)
    plt.show()