In [None]:
import numpy as np
import torch
import torchvision
import numpy as np

# Step 1: Load CIFAR10 training set without labels
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])
cifar10_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
cifar10_loader = torch.utils.data.DataLoader(cifar10_dataset, batch_size=1, shuffle=True)

images = []
labels = []
for image, label in cifar10_loader:
    images.append(image.squeeze().numpy())
    labels.append(label)
images = np.array(images)

# Step 2: Load pre-trained 34ResNet network
resnet = torchvision.models.resnet34(pretrained=True)
resnet.eval()

# Step 3: Extract feature vectors
features = []
for image in images:
    image_tensor = torch.from_numpy(image).unsqueeze(0)
    feature_vector = resnet(image_tensor)
    features.append(feature_vector.squeeze().detach().numpy())
features = np.array(features)

# Step 4: Normalize the feature vectors using feature scaling
min_vals = np.min(features, axis=0)
max_vals = np.max(features, axis=0)
normalized_features = (features - min_vals) / (max_vals - min_vals)

# Step 5: Define SOM network parameters
output_neurons = 10
input_dim = normalized_features.shape[1]

# Step 6: Initialize SOM weights with optimal values for each mode
modes = [
    {
        "name": "First mode",
        "neighborhood_diameter": 1,
        "weight_vectors": normalized_features[::len(normalized_features) // output_neurons].reshape((output_neurons, -1))
    },
    {
        "name": "Second mode",
        "neighborhood_diameter": 3,
        "weight_vectors": normalized_features[::len(normalized_features) // output_neurons].reshape((output_neurons, -1))
    }
]

for mode in modes:
    print(mode["name"])
    print("-" * 50)

    # Step 7: Train SOM network
    epochs = 20
    learning_rate = 0.5

    # Initialize SOM weights with optimal values
    weight_vectors = mode["weight_vectors"]

    for epoch in range(epochs):
        # Adjust learning rate and neighborhood diameter
        current_learning_rate = learning_rate * (1 - epoch / epochs)
        current_diameter = int(mode["neighborhood_diameter"] * (1 - epoch / epochs))

        for feature_vector in normalized_features:
            # Find the winning neuron
            distances = np.linalg.norm(feature_vector - weight_vectors, axis=1)
            winner_neuron = np.argmin(distances)

            # Update the winning neuron and its neighbors
            for neuron in range(output_neurons):
                distance = abs(neuron - winner_neuron)
                if distance <= current_diameter:
                    influence = np.exp(-(distance**2) / (2 * current_diameter**2))
                    weight_vectors[neuron] += current_learning_rate * influence * (feature_vector - weight_vectors[neuron])

    # Step 8: Determine distribution of labels in each cluster
    cluster_labels = [[] for _ in range(output_neurons)]
    for i, feature_vector in enumerate(normalized_features):
        distances = np.linalg.norm(feature_vector - weight_vectors, axis=1)
        winner_neuron = np.argmin(distances)
        cluster_labels[winner_neuron].append(cifar10_dataset[i][1])

    # Print the distribution of labels in each cluster
    for i, labels in enumerate(cluster_labels):
        print(f"Cluster {i+1}: {len(labels)} images")
        label_counts = {label: labels.count(label) for label in set(labels)}
        for label, count in label_counts.items():
            print(f"  Label {label}: {count} images")
        print()

    print()
