Write a program to demonstrate the working of EM algorithm. Apply EM algorithm to
cluster a set of data stored in a .CSV file.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from scipy.stats import norm
import seaborn as sns

mu1, sigma1 = 2, 1
mu2, sigma2 = -1, 0.8
X1 = np.random.normal(mu1, sigma1, size=200)
X2 = np.random.normal(mu2, sigma2, size=600)
X = np.concatenate([X1, X2])
X = X.reshape(-1, 1)
gmm = GaussianMixture(n_components=2, random_state=0)
gmm.fit(X)

x_grid = np.linspace(min(X), max(X), 1000).reshape(-1, 1)
density_estimation = np.exp(gmm.score_samples(x_grid))
sns.kdeplot(X, label = "Actual Density")
plt.plot(x_grid, density_estimation, label='Estimated density\nFrom Gaussian Mixture Model')
plt.xlabel('X')
plt.ylabel('Density')
plt.title('Density Estimation using GMM')
plt.legend()
plt.show()

Write a program to demonstrate the working of Naïve bayes classifier by considering
an appropriate textual data set and calculate the accuracy for the same.

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

naive_bayes_classifier = GaussianNB()

naive_bayes_classifier.fit(X_train, y_train)

y_pred = naive_bayes_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
classification_report_result = classification_report(y_test, y_pred, target_names=iris.target_names)
print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report_result)

Use appropriate dataset for clustering using k-Means algorithm. You can add Python ML
library classes/API in the program.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

data, true_labels = make_blobs(n_samples=300, centers=3, random_state=42)

kmeans = KMeans(n_clusters=3, random_state=42)
predicted_labels = kmeans.fit_predict(data)
centroids = kmeans.cluster_centers_

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.scatter(data[:, 0], data[:, 1], c=true_labels, cmap='viridis', edgecolors='k', s=50)
plt.title('True Clusters')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')

plt.subplot(1, 2, 2)
plt.scatter(data[:, 0], data[:, 1], c=predicted_labels, cmap='viridis', edgecolors='k', s=50)
plt.scatter(centroids[:, 0], centroids[:, 1], c='red', marker='X', s=200, label='Centroids')
plt.title('K-Means Clusters')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()

plt.show()

Write a program to demonstrate the working of Association rule learning using Apriori
Algorithm by using appropriate data set.

In [None]:
import mlxtend
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd

transactions = [['Milk', 'Bread', 'Butter'],
                ['Milk', 'Bread'],
                ['Milk', 'Diapers'],
                ['Milk', 'Beer', 'Diapers'],
                ['Bread', 'Butter'],
                ['Bread', 'Beer'],
                ['Butter', 'Diapers'],
                ['Bread', 'Beer', 'Diapers']]

encoder = TransactionEncoder()
onehot = encoder.fit_transform(transactions)
df = pd.DataFrame(onehot, columns=encoder.columns_)

frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print("Frequent Itemsets:")
print(frequent_itemsets)

print("Association Rules:")
print(rules)

Write a program to demonstrate the working of Principle Component Analysis.

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

data = {
    'Feature1': [1, 2, 3, 4, 5],
    'Feature2': [5, 4, 3, 2, 1],
    'Feature3': [2, 3, 4, 5, 6]
}

df = pd.DataFrame(data)

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

pca = PCA(n_components=2)
principal_components = pca.fit_transform(df_scaled)

df_pca = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])

print("Explained Variance Ratio:")
print(pca.explained_variance_ratio_)

formatted_principal_components = np.array2string(principal_components, precision=2, separator=', ')
print("Principal Components:", formatted_principal_components)

plt.scatter(df_pca['PC1'], df_pca['PC2'])
plt.title('PCA: Principal Components')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2
plt.show()