In [10]:
from sklearn.datasets import load_iris
import pandas as pd
# Load the Iris dataset
iris = load_iris()

# Access the features and target variable
df = pd.DataFrame(iris.data, columns = ['sepal length', 'sepal width', 'petal length', 'petal width']) 
df['label'] = iris.target 

In [11]:
df

Unnamed: 0,sepal length,sepal width,petal length,petal width,label
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [12]:
import numpy as np
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
import seaborn as sns

# Define the parameters for the GMM
Mu = np.array([[1, 2], [-3, -5], [5, 5]])
Sigma = np.array([
    [[2, 0], [0, 0.5]],
    [[1, 0], [0, 1]],
    [[0.5, 0], [0, 4]]
])
P = np.ones(3) / 3

# Create the GMM
gmm = GaussianMixture(n_components=3, means_init=Mu, covariances_init=Sigma, weights_init=P)
gmm.fit(Mu)  # Fit the initial parameters

# Generate random samples from the GMM
score = gmm.sample(1000)[0]

# Plot the simulated data
plt.figure()
sns.scatterplot(x=score[:, 0], y=score[:, 1])
plt.title('GMM - PDF Contours and Simulated Data')
plt.show()

# Fit GMM with different numbers of components
GMModels = []
np.random.seed(1)  # For reproducibility

for j in range(1, 4):
    gm_model = GaussianMixture(n_components=j, max_iter=1000, random_state=1)
    gm_model.fit(score)
    GMModels.append(gm_model)
    print(f'\n GM Mean for {j} Component(s):\n', gm_model.means_)

# Plot the results for 1 and 3 components
fig, axes = plt.subplots(2, 1, figsize=(10, 15))

for i, model in enumerate([GMModels[0], GMModels[2]]):
    sns.scatterplot(ax=axes[i], x=score[:, 0], y=score[:, 1])
    x = np.linspace(axes[i].get_xlim()[0], axes[i].get_xlim()[1], 100)
    y = np.linspace(axes[i].get_ylim()[0], axes[i].get_ylim()[1], 100)
    X, Y = np.meshgrid(x, y)
    XX = np.array([X.ravel(), Y.ravel()]).T
    Z = -model.score_samples(XX)
    Z = Z.reshape(X.shape)
    axes[i].contour(X, Y, Z, levels=np.logspace(0, 2, 12))
    title = 'Gaussian' if i == 0 else f'Gaussian Mixture Model - {3} Component(s)'
    axes[i].set_title(title)
    axes[i].set_xlabel('1st principal component')
    axes[i].set_ylabel('2nd principal component')

plt.tight_layout()
plt.show()

# Load Iris dataset and perform PCA
iris = load_iris()
pca = PCA(n_components=2)
score = pca.fit_transform(iris.data)

# Fit GMM to the PCA-transformed data
GMModels = []

for j in range(1, 4):
    gm_model = GaussianMixture(n_components=j, max_iter=1000, random_state=1)
    gm_model.fit(score)
    GMModels.append(gm_model)
    print(f'\n GM Mean for {j} Component(s):\n', gm_model.means_)

# Plot the PCA data with GMM contours
fig, axes = plt.subplots(2, 1, figsize=(10, 15))

for i, model in enumerate([GMModels[0], GMModels[2]]):
    sns.scatterplot(ax=axes[i], x=score[:, 0], y=score[:, 1], hue=iris.target_names[iris.target])
    x = np.linspace(axes[i].get_xlim()[0], axes[i].get_xlim()[1], 100)
    y = np.linspace(axes[i].get_ylim()[0], axes[i].get_ylim()[1], 100)
    X, Y = np.meshgrid(x, y)
    XX = np.array([X.ravel(), Y.ravel()]).T
    Z = -model.score_samples(XX)
    Z = Z.reshape(X.shape)
    axes[i].contour(X, Y, Z, levels=np.logspace(0, 2, 12))
    title = 'Gaussian' if i == 0 else f'Gaussian Mixture Model - {3} Component(s)'
    axes[i].set_title(title)
    axes[i].set_xlabel('1st principal component')
    axes[i].set_ylabel('2nd principal component')

plt.legend(loc='upper right')
plt.tight_layout()
plt.show()


ModuleNotFoundError: No module named 'matplotlib'