In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from minisom import MiniSom
from sklearn.preprocessing import StandardScaler

# Load dataset
weather_dataset = pd.read_csv('weatherHistory.csv')

# Filter top weather summary classes
top_classes = ['Overcast', 'Clear', 'Foggy']
weather_dataset = weather_dataset[weather_dataset['Summary'].isin(top_classes)]

# Handle missing values
weather_dataset["Precip Type"] = weather_dataset["Precip Type"].fillna("None")

# One-hot encode 'Precip Type'
weather_dataset = pd.get_dummies(weather_dataset, columns=["Precip Type"], drop_first=True)

# Extract date parts
weather_dataset["Formatted Date"] = pd.to_datetime(weather_dataset["Formatted Date"], utc=True)
weather_dataset["Month"] = weather_dataset["Formatted Date"].dt.month
weather_dataset["Year"] = weather_dataset["Formatted Date"].dt.year
weather_dataset["Day"] = weather_dataset["Formatted Date"].dt.day

# Save label and drop unwanted columns
labels = weather_dataset["Summary"].values
weather_dataset = weather_dataset.drop(columns=[
    "Daily Summary", "Loud Cover", "Formatted Date", 
    "Apparent Temperature (C)", "Summary"
])

# Convert to float32 for MiniSom compatibility
weather_dataset = weather_dataset.astype(np.float32)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(weather_dataset)

# Initialize and train SOM
som = MiniSom(x=30, y=30, input_len=X_scaled.shape[1], sigma=1.0, learning_rate=0.5)
som.random_weights_init(X_scaled)
som.train_random(X_scaled, num_iteration=1000)

# Map each data point to its winning neuron
winner_coordinates = np.array([som.winner(x) for x in X_scaled])

# Create a dict to collect labels mapped to each neuron
neuron_labels = {}

for coord, label in zip(winner_coordinates, labels):
    if coord not in neuron_labels:
        neuron_labels[coord] = []
    neuron_labels[coord].append(label)

# Define color map for classes
label_colors = {'Overcast': 'red', 'Clear': 'green', 'Foggy': 'blue'}

# Create a color grid for the SOM neurons based on majority label
color_grid = np.zeros((som_size, som_size, 3))  # RGB image grid initialized to black

for i in range(som_size):
    for j in range(som_size):
        if (i, j) in neuron_labels:
            # Find majority label for neuron
            majority_label = Counter(neuron_labels[(i, j)]).most_common(1)[0][0]
            color_grid[i, j] = plt.colors.to_rgb(label_colors[majority_label])
        else:
            color_grid[i, j] = (0, 0, 0)  # black for no data mapped

# Plot the colored SOM grid
plt.figure(figsize=(10, 10))
plt.title('SOM Cluster Map by Majority Weather Class')
plt.imshow(color_grid, origin='lower')
plt.axis('off')
plt.show()

TypeError: unhashable type: 'numpy.ndarray'