# Plotting Decision boundaries
For classification problems that involve exactly two predictor variables, a decision boundary may be plotted between the different classes. Any number of classes may be plotted in this manner.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.options.display.max_columns = 500
%matplotlib inline

### Read in data

In [None]:
heart = pd.read_csv('data/heart.csv')
heart.head()

In [None]:
heart.shape

### Scatterplot
Use Matplotlib to plot. Color by purchased.

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
ax.scatter(x='', y='', c='', data=, alpha=.5)

# Use Logistic Regression

In [None]:
X = hb[[]].values
y = hb[].values

In [None]:
from sklearn.linear_model import LogisticRegression
logr = LogisticRegression(tol=.00001)

In [None]:
logr.fit(X, y)

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [None]:
kf = KFold(n_splits=5, shuffle=True)
scores = cross_val_score(logr, X, y, cv=kf)
scores

In [None]:
scores.mean(), scores.std()

## Plot a decision boundary
If we are modeling with just two predictor variables, then it is possible to plot a decision boundary and color the areas on the plot that correspond to a particular class.

In [None]:
def plot_decision_boundary(clf, X, y):
    X1 = X[:, 0]
    X2 = X[:, 1]
    x_min, x_max = X1.min() - 1, X1.max() + 1
    y_min, y_max = X2.min() - 1, X2.max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                         np.linspace(y_min, y_max, 200))
    
    X_mesh = np.column_stack((xx.ravel(), yy.ravel()))
    Z = clf.predict(X_mesh)
    Z, _ = pd.factorize(Z)
    Z = Z.reshape(xx.shape)

    fig, ax = plt.subplots(figsize=(10, 8))
    ax.contourf(xx, yy, Z, alpha=0.4)
    y, _ = pd.factorize(y)
    ax.scatter(X1, X2, c=y, s=20, edgecolor='black')
    return ax

In [None]:
plot_decision_boundary(logr, X, y)

### All code at once

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

hb = pd.read_csv('.csv')
X = hb[['', '']].values
y = hb[''].values

logr = LogisticRegression(tol=.00001)
logr.fit(X, y)

kf = KFold(n_splits=5, shuffle=True)
scores = cross_val_score(logr, X, y, cv=kf)
print("CV Scores:",  scores)
print(f"Mean CV accuracy is {scores.mean() :.3g} and STD is {scores.std() :.3g}")

plot_decision_boundary(logr, X, y)