In [71]:
import numpy as np

def _similarity(Xi,Xj,sigma=0.01):
    return np.exp(-np.sum((Xi-Xj)**2)/(2*sigma**2))

def init_graph(X,y,k=3):
    n_X = len(X)
    distance = np.zeros((n_X,n_X))+np.inf
    graph = np.zeros((n_X,n_X))
    for i in range(n_X):
        for j in range(n_X):
            distance[i,j] = np.sqrt(np.sum((X[i]-X[j])**2))
    for i in range(n_X):
        neighbors_index = np.argsort(distance[:,i])[:k+1]
        graph[neighbors_index,i] = 1
    for i in range(n_X):
        if y[i,:].sum()!=0:
            graph[:,i]=0
            graph[i,i]=1
    return graph

def init_W(graph,X,sigma=1):
    n_X = len(X)
    W = np.zeros((n_X,n_X))
    for i in range(n_X):
        for j in range(n_X):
            if i==j:
                W[i,j]=1
            elif graph[i,j]==1:
                W[i,j]=_similarity(X[i,:],X[j,:],sigma)
            else:
                W[i,j]=0
                
    print("W:")
    print(W)
    print()
    return W

def init_R(W,n_X,t):
    D = np.zeros((n_X,n_X))
    for i in range(n_X):
        D[i,i] = np.sum(W[i,:])
    P = np.linalg.pinv(D).dot(W)
    R = P**t
    print("D：")
    print(D)
    print()
    print("未归一化前：")
    print(R)
    print()
    R /= np.sum(R,axis=0)
    return R

def init_Y(y):
    
    
    return y

def predict(R,y,n_samples,n_X):
    """y矩阵包含train和test,y[:l]-->train,y[l:]-->test"""
    l = n_samples
    n_labels = y.shape[1]
    y = y.T
    predict = np.copy(y).astype('float')
#     print("y:")
    for n in range(n_labels):
        for j in range(l,n_X):
#                 print("4:")
#                 print(n)
#                 print(j)
#                 print(y[n,:l])
#                 print(R[:l,j])
#                 print(y[n,:l].dot(R[:l,j]))
#                 print()
            predict[n,j] = y[n,:l].dot(R[:l,j])
#             print(predict[n,j])
#             print()
#     print(predict)
    return predict


def fit_predict(X_train,X_test,y_train,y_test,_k=3,_t=2,_sigma=10):
    """
        X[:n_samples]=X_train, X[n_samples:]=X_test
        y[:n_samples]=y_train, y[n_samples:]=y_test
    """
    
    n_samples = len(X_train)
    n_test = len(X_test)
    n_X = n_samples+n_test
    X = np.vstack((X_train,X_test))
    y = np.vstack((y_train,np.zeros_like(y_test)))
    
    graph = init_graph(X,y,k=_k)
    W = init_W(graph,X,sigma=_sigma)
    R = init_R(W,n_X,t=_t)
    res = predict(R,y,n_samples,n_X)
    return res

In [161]:
X = np.array([[1,1],[1,2],[3,2],[3,1]])
y = np.array([[1,0],[0,1],[0,0],[0,0]])

# graph = np.array([[1,0,0,1,0],
#                   [0,1,1,0,0],
#                   [0,1,1,1,1],
#                   [1,0,1,1,1],
#                   [0,0,1,1,1]])

n_samples = len(y)
graph = init_graph(X,y,n_samples,3)
print("graph:")
print(graph)
print()
W = init_W(graph,X,sigma=10)
R = init_R(W,len(X),t=1)
print("R:")
print(R)
print()
res = predict(R,y,2,4)
print(res)
print()

graph:
[[1. 0. 1. 1.]
 [0. 1. 1. 1.]
 [0. 0. 1. 1.]
 [0. 0. 1. 1.]]

W:
[[1.         0.         0.97530991 0.98019867]
 [0.         1.         0.98019867 0.97530991]
 [0.         0.         1.         0.99501248]
 [0.         0.         0.99501248 1.        ]]

D：
[[2.95550859 0.         0.         0.        ]
 [0.         2.95550859 0.         0.        ]
 [0.         0.         1.99501248 0.        ]
 [0.         0.         0.         1.99501248]]

未归一化前：
[[0.33835124 0.         0.32999732 0.33165144]
 [0.         0.33835124 0.33165144 0.32999732]
 [0.         0.         0.50125    0.49875   ]
 [0.         0.         0.49875    0.50125   ]]

R:
[[1.         0.         0.19859631 0.19959178]
 [0.         1.         0.19959178 0.19859631]
 [0.         0.         0.30165821 0.30015369]
 [0.         0.         0.30015369 0.30165821]]

[[1.         0.         0.19859631 0.19959178]
 [0.         1.         0.19959178 0.19859631]]



## 数据整理

In [1]:
from skmultilearn.dataset import load_dataset
X_train, y_train, feature_names, label_names = load_dataset('emotions', 'train')
X_test, y_test, _, _ = load_dataset('emotions', 'test')

emotions:train - exists, not redownloading
emotions:test - exists, not redownloading


In [10]:
# print('X_train       is {}'.format(X_train))
print('Y_train       is {}'.(feature_names))
print('label_names   is {}format(y_train))
print('feature_names is {}'.format'.format(label_names ))

Y_train       is   (0, 1)	1
  (0, 2)	1
  (1, 0)	1
  (1, 5)	1
  (2, 1)	1
  (2, 5)	1
  (3, 2)	1
  (4, 3)	1
  (5, 1)	1
  (5, 2)	1
  (6, 0)	1
  (6, 1)	1
  (7, 5)	1
  (8, 0)	1
  (8, 1)	1
  (9, 2)	1
  (9, 3)	1
  (9, 4)	1
  (10, 1)	1
  (10, 2)	1
  (11, 2)	1
  (12, 0)	1
  (13, 2)	1
  (13, 4)	1
  (14, 4)	1
  (15, 0)	1
  (15, 5)	1
  (16, 2)	1
  (16, 3)	1
  (16, 4)	1
  (17, 1)	1
  (17, 2)	1
  (18, 4)	1
  (18, 5)	1
  (19, 2)	1
  (19, 3)	1
  (19, 4)	1
  (20, 0)	1
  (20, 5)	1
  (21, 1)	1
  (22, 1)	1
  (23, 5)	1
  (24, 5)	1
  (25, 2)	1
  (25, 5)	1
  (26, 2)	1
  (27, 0)	1
  (27, 1)	1
  (27, 5)	1
  (28, 1)	1
  (29, 2)	1
  (30, 5)	1
  (31, 2)	1
  (31, 3)	1
  (32, 0)	1
  (32, 5)	1
  (33, 5)	1
  (34, 0)	1
  (34, 1)	1
  (35, 1)	1
  (35, 2)	1
  (36, 2)	1
  (36, 3)	1
  (36, 4)	1
  (37, 1)	1
  (37, 2)	1
  (38, 0)	1
  (38, 1)	1
  (39, 0)	1
  (39, 5)	1
  (40, 1)	1
  (41, 0)	1
  (41, 4)	1
  (42, 2)	1
  (42, 3)	1
  (42, 4)	1
  (43, 1)	1
  (43, 2)	1
  (44, 0)	1
  (44, 1)	1
  (45, 1)	1
  (45, 2)	1
  (46, 2)	1
  (46

## 模型预测

#### SVM

In [174]:
from skmultilearn.problem_transform import ClassifierChain
from sklearn.svm import SVC
import sklearn.metrics as M

clf = ClassifierChain(
    classifier=SVC(gamma=0.05),
    require_dense=[False, True]
)
clf.fit(X_train, y_train)
prediction = clf.predict(X_test)
hamming_loss_emotions = M.hamming_loss(y_test, prediction)
ACC_emotions = M.accuracy_score(y_test, prediction)
print('hamming_loss_emotions is {}'.format(hamming_loss_emotions))
print('ACC_emotions is {}'.format(ACC_emotions))

hamming_loss_emotions is 0.2747524752475248
ACC_emotions is 0.11386138613861387


#### MLTSVM

In [175]:
from skmultilearn.adapt import MLTSVM

clf_mltsvm = MLTSVM(sor_omega=0.5)
clf_mltsvm.fit(X_train,y_train)
prediction2 = clf_mltsvm.predict(X_test)
hamming_loss_emotions2 = M.hamming_loss(y_test, prediction2)
ACC_emotions2 = M.accuracy_score(y_test, prediction2)
print('hamming_loss_emotions is {}'.format(hamming_loss_emotions2))
print('ACC_emotions is {}'.format(ACC_emotions2))



hamming_loss_emotions is 0.3292079207920792
ACC_emotions is 0.0


  self.treshold = 1.0 / np.max(self.wk_norms)
  all_distances = (-X_with_bias.dot(self.wk_bk.T)) / wk_norms_multiplicated


#### MLKNN

In [162]:
from skmultilearn.adapt import MLkNN
from sklearn.metrics import accuracy_score

clf_knn = MLkNN(k=20)
clf_knn.fit(X_train, y_train)
prediction3 = clf_knn.predict(X_test)
hamming_loss_emotions3 = M.hamming_loss(y_test, prediction3)
ACC_emotions3 = M.accuracy_score(y_test, prediction3)
print('hamming_loss_emotions is {}'.format(hamming_loss_emotions3))
print('ACC_emotions is {}'.format(ACC_emotions3))

hamming_loss_emotions is 0.2995049504950495
ACC_emotions is 0.13861386138613863


#### TML

In [163]:
# X_train = X_train.toarray()
# X_test = X_test.toarray()
# y_train = y_train.toarray()
# y_test = y_test.toarray()

pre = fit_predict(X_train,X_test
            ,y_train,y_test,_k=7,_t=4,_sigma=20)
res = pre>0.5     #阈值取0.5
res = res.astype('int')
hamming_loss_emotions4 = M.hamming_loss(y_test, res.T[len(X_train):])
ACC_emotions4 = M.accuracy_score(y_test, res.T[len(X_train):])
print('hamming_loss_emotions is {}'.format(hamming_loss_emotions4))
print('ACC_emotions is {}'.format(ACC_emotions4))

W:
[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]]

D：
[[1.         0.         0.         ... 0.         0.         0.        ]
 [0.         3.947489   0.         ... 0.         0.         0.        ]
 [0.         0.         1.96559363 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 2.86728565 0.         0.        ]
 [0.         0.         0.         ... 0.         1.98583881 0.        ]
 [0.         0.         0.         ... 0.         0.         4.90125251]]

未归一化前：
[[1.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.00411828 0.         ... 0.         0.         0.        ]
 [0.         0.         0.06699233 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.01479504 0.         0.        ]
 [0.         0.         0.         ... 0.         0.06430193 0.        ]
 [0.         0.

In [8]:
# 
from skmultilearn.adapt import MLkNN
from sklearn.metrics import accuracy_score
import sklearn.metrics as M

for i in range(1,20):
    clf_knn = MLkNN()
    clf_knn.fit(X_train, y_train)
    prediction7 = clf_knn.predict(X_test)
    hamming_loss_emotions7 = M.hamming_loss(prediction7, y_test)
    ACC_emotions7 = M.accuracy_score(y_test, prediction7)
    print('hamming_loss_emotions is {}'.format(hamming_loss_emotions7))
    print('ACC_emotions is {}'.format(ACC_emotions7))
    print()

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emotions is 0.30363036303630364
ACC_emotions is 0.13366336633663367

hamming_loss_emo

KeyboardInterrupt: 

In [74]:
from sklearn import datasets
import numpy as np

iris=datasets.load_iris()
data = iris.data[:,:2]
X_iris = []
y_iris = []
for i in range(len(data)):
    for j in range(i+1,len(data)):
        if i!=j and all(data[i]==data[j]):
            print(all(data[i] in X_iris))
            X_iris.append(data[i])
for point in X_iris:
    y_iris.append(iris.target[iris.data==point])
# print(np.array(X_iris))
# print(np.array(y_iris))

TypeError: 'bool' object is not iterable

In [63]:
X_iris2 = []
X_iris2.append(np.array([1,2]))
X_iris2.append(np.array([1,2]))
X_iris2.append(np.array([3,2]))
X_iris2 = np.array(X_iris2.view(X_iris2.dtype.descr * X_iris2.shape[1]))
np.unique(X_iris2)

AttributeError: 'list' object has no attribute 'view'