In [1]:
# Question 7: Combining PCA and t-SNE for Better Results
# Description: Combine PCA and t-SNE for large datasets – reduce dimensions using PCA, visualize with t-SNE.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml

# Load CIFAR-10 dataset
from sklearn import datasets

# Load CIFAR-10 data from sklearn datasets
cifar10 = datasets.fetch_openml('CIFAR_10_small')

# Preprocess the data
X = cifar10.data.astype(np.float32) / 255.0  # Normalize pixel values to [0, 1]
y = cifar10.target.astype(int)

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 1: Apply PCA for initial dimensionality reduction
n_components_pca = 50  # Reduce to 50 dimensions
pca = PCA(n_components=n_components_pca)
X_pca = pca.fit_transform(X_scaled)

# Step 2: Apply t-SNE for further dimensionality reduction (usually 2 or 3 components)
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(X_pca)

# Step 3: Visualize the results using a scatter plot
plt.figure(figsize=(10, 8))
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='viridis', alpha=0.5, s=10)
plt.colorbar()
plt.title('t-SNE Visualization of CIFAR-10 (after PCA)')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')
plt.show()


: 