In [1]:
import numpy as np  
import pandas as pd
from sklearn.preprocessing import StandardScaler
from scipy import stats
import math

In [None]:
# 指定特徵名稱
feature_names = ['label',
                 'Alcohol', 
                 'Malic acid',
                 'Ash',
                 'Alcalinity of ash' ,
                 'Magnesium',
                 'Total phenols',
                 'Flavanoids',
                 'Nonflavanoid phenols',
                 'Proanthocyanins',
                 'Color intensity',
                 'Hue',
                 'OD280/OD315 of diluted wines',
                 'Proline' ]
data=pd.read_csv("wine.data", names=feature_names,index_col=0)
data=data.sample(frac=1)
half = len(data) // 2
labels = data.index
# print(data.head()) # show first 5 items.
# print(data.info())
labels


In [None]:
train=data.iloc[:half]
test=data.iloc[half:]
print(type(train),train.shape)
print(type(test),test.shape)
print(train)

In [None]:
# 抽出標籤那一行
labels = train.iloc[:,0]
labels_test= test.iloc[:,0]
print(labels.head())

print(train)
print(train.shape)
labels = train.iloc[:,0].values
labels_test = test.iloc[:,0].values
train = train.drop("label",axis=1)
test  = test.drop("label",axis=1)
train_features = train.iloc[:,:]
test_features  = test.iloc[:, :]
print(labels)
print(labels.shape)
print(test_features)
print(test_features.shape)

In [None]:

class SOFM:
    def __init__(self):
        self.W = []
        self.M = 0
        self.sofmamount = 0
        self.grid_size = 0
        self.labelgrid = [[[] for _ in range(10)] for _ in range(10)]

    def initial(self, inputamount, sofmamount):
        self.M = inputamount
        self.sofmamount = sofmamount
        self.grid_size = int(np.sqrt(sofmamount))
        weight_range = 0.1
        self.W = np.random.uniform(-weight_range, weight_range, size=(self.grid_size, self.grid_size, self.M))

    def forward(self, data,winner_counts):
            for index, row in data.iterrows():
                data_array = row.values.reshape(1, -1)
                distances, winner_pos = self.forward_once(data_array)
                self.labelgrid[winner_pos[0]][winner_pos[1]].append(labels[index])
                self.backward(distances, winner_pos, data, 0.1)
                # 統計獲勝神經元頻率
                winner_counts[winner_pos] += 1
            return winner_counts

    def forward_once(self, data):
        expanded_data = np.tile(data, (self.grid_size * self.grid_size, 1)).reshape(self.grid_size, self.grid_size, -1)
        distances = np.linalg.norm(expanded_data - self.W, axis=2)
        winner_index = np.argmin(distances)
        winner_pos = np.unravel_index(winner_index, (self.grid_size, self.grid_size))
        return distances, winner_pos

    def backward(self, distances, winner_pos, data, learning_rate):
        expanded_data = np.tile(data, (self.grid_size * self.grid_size, 1)).reshape(self.grid_size, self.grid_size, -1)
        delta_W = learning_rate * (expanded_data - self.W)
        self.W += delta_W


In [None]:
# 建立 SOFM
feature_dim = 13
sofm_layer = 100
sofm = SOFM()
sofm.initial(feature_dim, sofm_layer)
winner_counts = np.zeros((sofm.grid_size, sofm.grid_size))
classification_results = np.zeros((sofm.grid_size, sofm.grid_size))

sofm.forward(train_features, winner_counts)
sofm_output = sofm.forward(test_features, winner_counts)
# 輸出結果
print("SOFM輸出：")
print(sofm_output)
print(sofm.labelgrid)
# 輸出權重矩陣
# print(sofm.W)


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 繪製SOFM輸出的熱力圖和標籤
plt.subplot(1, 2, 1)
plt.title('SOFM Output')
plt.imshow(sofm_output, cmap='hot', interpolation='nearest')
plt.colorbar()

# 在每個單元格中顯示數字標籤
for i in range(sofm_output.shape[0]):
    for j in range(sofm_output.shape[1]):
        label = str(int(sofm_output[i][j])) + ',' + ','.join(map(str, sofm.labelgrid[i][j])) if sofm.labelgrid[i][j] else str(int(sofm_output[i][j]))
        plt.text(j, i, label, ha='center', va='center', color='black')

# 調整子圖之間的間距
plt.tight_layout()

# 顯示圖形
plt.show()
