In [1]:
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from minisom import MiniSom
import numpy as np
import matplotlib.pyplot as plt

# Load the MNIST dataset
mnist = fetch_openml('mnist_784')

# Scale the data and apply PCA for dimensionality reduction
X = mnist.data.astype('float32')
X = StandardScaler().fit_transform(X)
pca = PCA(n_components=50, random_state=42)
X = pca.fit_transform(X)

# Initialize the SOM
som_shape = (20, 20)
som = MiniSom(som_shape[0], som_shape[1], X.shape[1], sigma=1.0, learning_rate=0.5,
              neighborhood_function='gaussian', random_seed=42)

# Train the SOM
som.train_random(X, 10000)

# Map each data point to its closest neuron
mapped = som.win_map(X)

# Create a plot of the digit distribution across the SOM
fig, ax = plt.subplots(som_shape[0], som_shape[1], figsize=(10, 10))
for i in range(som_shape[0]):
    for j in range(som_shape[1]):
        ax[i,j].imshow(np.mean(mapped[(i,j)], axis=0).reshape(28, 28), cmap='gray')
        ax[i,j].axis('off')
plt.tight_layout()
plt.show()
