## Python packages

### NumPy: multi-dimensional arrays, linear algebra, math
[Numpy.org](https://numpy.org/)  
`import numpy as np`

### Matplotlib: data visualization
[matplotlib.org](https://matplotlib.org/)  
`import matplotlib.pyplot as plt`

### scikit-learn: machine learning library maintained by Google
[scikit-learn.org](https://scikit-learn.org/stable/)  
`import sklearn.cluster`  

When in doubt, read the docs!

---

In [2]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
np.random.seed(1)

mean1 = np.array([5.0, 5.0])
cov1 = np.array([[1.0, 0.5], [0.5, 1.0]])
dist1 = np.random.multivariate_normal(mean1, cov1, 250)

mean2 = np.array([-5.0, -5.0])
cov2 = np.array([[1.0, 0.5], [0.5, 1.0]])
dist2 = np.random.multivariate_normal(mean2, cov2, 250)

mean3 = np.array([-5.0, 5.0])
cov3 = np.array([[1.0, 0.5], [0.5, 1.0]])
dist3 = np.random.multivariate_normal(mean3, cov3, 250)

dists = np.concatenate((dist1, dist2, dist3), axis=0)

plt.figure(figsize=(10, 10))
plt.plot(dists[:,0], dists[:,1], 'o')
plt.axis('equal')
plt.xlabel("Predictor 1")
plt.ylabel("Predictor 2")
plt.show()

In [None]:
import sklearn.cluster

kmeans_model = sklearn.cluster.KMeans(n_clusters=3)
kmeans_model.fit(dists)
clusters = kmeans_model.predict(dists)

plt.figure(figsize=(10, 10))
plt.scatter(dists[:,0], dists[:,1], c=clusters, cmap=plt.get_cmap('Paired'))
plt.axis('equal')
plt.show()

In [None]:
# Set a random seed so we all have the same results
np.random.seed(1)

# Create cluster #1
mean1 = np.array([5.0, 5.0])
cov1 = np.array([[1.0, 0.5], [0.5, 1.0]])
dist1 = np.random.multivariate_normal(mean1, cov1, 250)

# Create cluster #2
mean2 = np.array([-5.0, -5.0])
cov2 = np.array([[4.0, 0.5], [0.5, 4.0]])
dist2 = np.random.multivariate_normal(mean2, cov2, 250)

mean3 = np.array([-3.0, 3.0])
cov3 = np.array([[1.0, -0.9], [-0.9, 1.0]])
dist3 = np.random.multivariate_normal(mean3, cov3, 250)

dists = np.concatenate((dist1, dist2, dist3), axis=0)

plt.figure(figsize=(10, 10))
plt.plot(dists[:,0], dists[:,1], 'o')
plt.axis('equal')
plt.xlabel("Predictor 1")
plt.ylabel("Predictor 2")
plt.show()

In [None]:
# Initialze K-Means with 3 clusters
kmeans_model = sklearn.cluster.KMeans(n_clusters=3)

# Train the model using the data
kmeans_model.fit(dists)

# Run the model on our data to calculate clusters
clusters = kmeans_model.predict(dists)

plt.figure(figsize=(10, 10))
plt.scatter(dists[:,0], dists[:,1], c=clusters, cmap=plt.get_cmap('Paired'))
plt.axis('equal')
plt.show()

In [None]:
import sklearn.mixture

mixture_model = sklearn.mixture.GaussianMixture(n_components=3, random_state=1)

mixture_model.fit(dists)

mixture_clusters = mixture_model.predict(dists)

plt.figure(figsize=(10, 10))
plt.scatter(dists[:,0], dists[:,1], c=mixture_clusters, cmap=plt.get_cmap('Paired'))
plt.axis('equal')
plt.show()