Dimensionality reduction (using PCA) and modelling (K-means) of dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import fetch_openml

# Load MNISTdataset (handwritten digits)
mnist = fetch_openml(('mnist_784'), version=1, as_frame=False)
X = mnist.data[:5000] # Use a subset for faster processing

# Apply PCA to reduce dimensions from 784 to 100
pca = PCA(n_components = 100)
X_pca = pca.fit_transform(X)


# Percentage of variance retained
print("Explained Variance Ratio:", np.sum(pca.explained_variance_ratio_))


# Reconstruct images from reduced PCA components
X_reconstructed = pca.inverse_transform(X_pca)


# Display original and reconstructed images
fig, axes = plt.subplots(1, 2, figsize=(8, 4))


# Original image
axes[0].imshow(X[0].reshape(28, 28), cmap ='gray')
axes[0].set_title("Original Image")

# Reconstructed image using 100 PCA components
axes[1].imshow(X_reconstructed[0].reshape(28, 28), cmap ='gray')
axes[1].set_title("Reduced Image (PCA)")

plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import fetch_olivetti_faces


# Load Olivetti Faces dataset (images of faces)
faces = fetch_olivetti_faces(shuffle=True, random_state=420)
X = faces.data # Each image is converted into a 1D array of pixel values


# Apply PCA to reduce dimensions
pca = PCA(n_components=100) # Reduce from 4096 features to 100
X_pca = pca.fit_transform(X)


# Percentage of variance retained
print("Explained Variance Ratio:", np.sum(pca.explained_variance_ratio_))


# Visualize original and reconstructed images
fig, axes = plt.subplots(1, 2, figsize=(8, 4))


# Original image
axes[0].imshow(X[0].reshape(64, 64), cmap='gray')
axes[0].set_title("Original Image")


# Reconstructed image using 100 PCA components
reconstructed = pca.inverse_transform(X_pca)
axes[1].imshow(reconstructed[0].reshape(64, 64), cmap='gray')
axes[1].set_title("Reduced Image (PCA)")


plt.show()

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Simulation Traffic Data 
np.random.seed(42)
traffic_data = pd.DataFrame({
    "Vehicle Count" : np.random.randint(100, 500, 10),
    "Avg Speed" : np.random.randint(30, 80, 10),
    "Time of Day" : np.random.randint(0, 24, 10),
    "Road Type" : np.random.randint(1, 5, 10)
})

# Standardize the data
scaler = StandardScaler()
traffic_data_scaled = scaler.fit_transform(traffic_data)

# Apply PCA to reduce dimensions
pca = PCA(n_components=2)
traffic_data_pca = pca.fit_transform(traffic_data_scaled)

# Print variance explained 
print("Explained Variance Ratio:", pca.explained_variance_ratio_)

In [None]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Sample dataset
data = pd.DataFrame({
    'Size' : [2000, 1500, 3000],
    'Bedrooms' : [3, 2, 4],
    'Bathrooms' : [2, 1, 3],
    'Location Score' : [8, 7, 9],
    'Year Built' : [2010, 2015, 2005]
})

# Standardize features (mean = 0, variance = 1)
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)
from sklearn.decomposition import PCA

# Apply PCA: Reduce from 5 dimensions to 2
pca = PCA(n_components = 2)
data_pca = pca.fit_transform(data_scaled)

# Convert to DataFrame
data_pca = pd.DataFrame(data_pca, columns = ['PC1', 'PC2'])

# Print transformed data
print(data_pca)
import matplotlib.pyplot as plt

# Scatter plot of the reduced features
plt.scatter(data_pca['PC1'], data_pca['PC2'], color = 'blue')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Dimensionality Reduction using PCA')
plt.show()