<a href="https://colab.research.google.com/github/benvictoria21/python-machine-learning/blob/master/section4_statistical_decision_theory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%matplotlib inline
import random
import scipy
import scipy.stats
import matplotlib.pyplot as plt

In [None]:
"""FIGURE 2.5. The optimal Bayes decision boundary for the simulation example.
Since the generating density is known for each class, this decision boundary can be
calculated exactly."""
sample_size = 100
# Parameters for mean distributions
mean_blue = [1, 0]
mean_orange = [0, 1]
mean_cov = scipy.eye(2)
mean_size = 10

# Additional parameters for blue and orange distributions
sample_cov = scipy.eye(2)/5

# Generate mean components for blue and orange (10 means for each)
sample_blue_mean = scipy.random.multivariate_normal(mean_blue, mean_cov, mean_size)
sample_orange_mean = scipy.random.multivariate_normal(mean_orange, mean_cov, mean_size)

# Generate blue points
sample_blue = scipy.array([
    scipy.random.multivariate_normal(sample_blue_mean[r], sample_cov)
    for r in scipy.random.randint(0, 10, 100)
])
y_blue = scipy.zeros(sample_size)

# Generate orange points
sample_orange = scipy.array([
    scipy.random.multivariate_normal(sample_orange_mean[r], sample_cov)
    for r in scipy.random.randint(0, 10, 100)
])
y_orange = scipy.ones(sample_size)

data_x = scipy.concatenate((sample_blue, sample_orange), axis=0)
data_y = scipy.concatenate((y_blue, y_orange))


In [None]:
def density_blue(arr:scipy.ndarray)->scipy.ndarray:
    densities = scipy.array([
        scipy.stats.multivariate_normal.pdf(arr, mean=m, cov=mean_cov)
        for m in sample_blue_mean
    ])
    return densities.mean(axis=0)


def density_orange(arr:scipy.ndarray)->scipy.ndarray:
    densities = scipy.array([
        scipy.stats.multivariate_normal.pdf(arr, mean=m, cov=mean_cov)
        for m in sample_orange_mean
    ])
    return densities.mean(axis=0)

In [None]:
min_x = data_x.min(axis=0)
max_x = data_x.max(axis=0)
print(min_x, max_x)
arr = scipy.array([(i, j)
                   for i in scipy.linspace(min_x[0]-.1, max_x[0]+.1, 100)
                   for j in scipy.linspace(min_x[1]-.1, max_x[1]+.1, 100)])
proba_blue = density_blue(arr)
proba_orange = density_orange(arr)


In [None]:
arr

In [None]:
# Plot
fig = plt.figure(1)
ax = fig.add_subplot(1, 1, 1)
# Original data
ax.plot(sample_blue[:, 0], sample_blue[:, 1], 'o', color='C0')
ax.plot(sample_orange[:, 0], sample_orange[:, 1], 'o', color='C1')
# Bayes classifier
mask_blue = proba_blue > proba_orange
mask_orange = ~mask_blue
ax.plot(arr[mask_blue, 0], arr[mask_blue, 1], 'o',
        markersize=2, color='C0', alpha=.2)
ax.plot(arr[mask_orange, 0], arr[mask_orange, 1], 'o',
        markersize=2, color='C1', alpha=.2)
ax.set_title('Bayes Optimal Classifier')
plt.show()
