In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing 
from sklearn import metrics

In [5]:
data = pd.read_table("../iris.data", sep = ',', header = None)
data1 = np.array(data.iloc[:, 0:4])
iris_train, iris_test, y_train, y_test = train_test_split(data1, data.iloc[:,4], test_size=0.5, stratify = data.iloc[:,4])

In [6]:
class SOM():
    def __init__(self, X, out, tm): #样本平均数权值法
        #self.X = preprocessing.MinMaxScaler().fit_transform(X)
        self.X = self.normal(X) # 样本归一化
        self.tm = tm #最大迭代次数
        self.out = out
        if isinstance(self.out, tuple):  #out为元组即输出层设计为二维平面
            self.node = 1
            for i in self.out:
                self.node *= i
        elif isinstance(self.out, int):  #out为整型即输出层设计为一维平面
            self.node = self.out
        #获取样本中心向量并且在中心向量上随机添加某一小的随机数 从而获得权值向量
        self.W = np.array([self.X.mean(axis = 0) for _ in range(self.node)]) + np.random.rand(self.node, self.X.shape[1])
        #self.W = np.random.rand(self.node, self.X.shape[1])
        print(self.W)
        self.W = self.normal(self.W) #权值初始化
        print(self.W)
    
    def normal(self, data): #归一化     
        for i in range(data.shape[0]):
            data[i, :] /= np.sqrt(np.sum(data[i, :] * data[i, :]))
        return data

    def getR(self, t): #获取领域半径
        c1 = np.power(self.node, 1/5)    #为与输出层节点数 self.node 有关的正常数
        r = c1 * (1 - t / self.tm)
        return r
    
    def getN(self, winner, r): #获取领域内节点
        N = list()
        if isinstance(self.out, tuple) and len(self.out) == 2:
            for i in range(self.node):
                x = np.abs(i // self.out[0] - winner // self.out[0] )
                y = np.abs(i % self.out[1] - winner % self.out[1])
                if x <= r and y <= r:
                    N.append(i)
        elif isinstance(self.out, int):
            for i in range(self.node):
                x = np.abs(i - winner)
                if x <= r:
                    N.append(i)
        return N
        
    def getAlpha(self, t): #获取学习率
        c2 = 0.9  # 为与学习率有关的正常数
        alpha = c2 * (1 - t / self.tm)
        return alpha
    
    def updataW(self, alpha, N, x): #更新权值
        self.W[N] += alpha * (x - self.W[N])
        return
    
    def train(self): #训练
        for t in range(self.tm):
            for x in self.X:
                winner = np.argmax(np.dot(x, self.W.T))
                self.updataW(self.getAlpha(t), self.getN(winner, self.getR(t)), x)   
            if self.getAlpha(t) <= 0.00001:
                print(t)
                break
        print(self.W)
        return
    
    def test(self, test_data): #测试
        res = list()
        for i in test_data:
            winner = np.argmax(np.dot(i, self.W.T))
            res.append(winner)
        return res

In [8]:
for i in range(8):
    np.random.shuffle(iris_train)
    som = SOM(iris_train, 3, 1000)
    som.train()
    res = som.test(iris_test)
    err = 1 - metrics.adjusted_rand_score(np.array(y_test), res) #计算 1-RI值
    print('错分数:', round(err * len(y_test),4))
    print('错分率:', '{:.2%}'.format(err))

[[1.1823957  0.79352791 0.53650733 0.99552794]
 [1.65899625 1.31322301 1.03574881 0.1820897 ]
 [1.20647515 1.23522323 1.16813621 1.08374744]]
[[0.65023139 0.43638247 0.2950399  0.54746775]
 [0.70213577 0.55579442 0.43835921 0.07706569]
 [0.51349088 0.52572642 0.49717335 0.46125644]]
[[0.70524856 0.3158959  0.59586001 0.21320751]
 [0.75196746 0.35207705 0.5302274  0.16537358]
 [0.80041039 0.54913349 0.23220075 0.03999563]]
错分数: 5.9132
错分率: 7.88%
[[1.55955731 0.60161481 1.1927654  1.12222451]
 [1.21418355 0.55070474 0.8979626  0.17453705]
 [0.82917552 0.86434734 1.32865931 0.35552165]]
[[0.66643875 0.2570854  0.50969918 0.47955525]
 [0.75093991 0.34059609 0.55536575 0.10794648]
 [0.45463396 0.47391855 0.72849913 0.19493124]]
[[0.70707255 0.31417213 0.59481534 0.21216627]
 [0.75126044 0.35582181 0.52922038 0.16469048]
 [0.80058739 0.54890786 0.23223497 0.04018978]]
错分数: 5.9132
错分率: 7.88%
[[0.89499903 0.46807699 1.0940786  0.52865594]
 [1.12642398 1.15647721 1.30278984 0.24629723]
 [1.2068

In [11]:
class SOM2(SOM): 
    def __init__(self, X, out, tm):#覆盖权值法
        #self.X = preprocessing.MinMaxScaler().fit_transform(X)
        self.X = self.normal(X) #样本归一化
        self.tm = tm
        self.out = out
        if isinstance(self.out, tuple): #out为元组即输出层设计为二维平面
            self.node = 1
            for i in self.out:
                self.node *= i
        elif isinstance(self.out, int): #out为整型即输出层设计为一维平面
            self.node = self.out
        center = self.X.mean(axis = 0) #样本中心向量
        dmax = 0  
        for i in X:#计算每个样本和中心向量的距离
            dis = np.linalg.norm(i - center)  
            if dmax < dis:
                dmax = dis
        w = np.zeros((self.node, 1))
        for i in center:  #从多维正态分布中获取权值
            newcol = np.random.normal(i, dmax, (self.node,1)) 
            w = np.concatenate((w, newcol), axis = 1)
        w = w[:,1:w.shape[1]]
        self.W = self.normal(w)  #权值归一化
        print(self.W)

In [12]:
for i in range(8):
    som2 = SOM2(iris_train, 3, 750)
    som2.train()
    res2 = som2.test(iris_test)
    err2 = 1 - metrics.adjusted_rand_score(y_test, res2)
    print('错分数:', round(err2 * len(y_test),4))
    print('错分率:', '{:.2%}'.format(err2))

[[ 0.88864909  0.15486     0.36649293  0.22804409]
 [ 0.80398292 -0.375437    0.40638238  0.21797224]
 [ 0.84709793  0.08755011  0.52222224  0.04521075]]
[[0.79997174 0.54982669 0.23208722 0.03998665]
 [0.77618106 0.50784706 0.29689367 0.06838535]
 [0.72596223 0.33214331 0.56682008 0.19155509]]
错分数: 32.6471
错分率: 43.53%
