# 无标签样本迁移

# 一、JDA 联合分布适配

JDA是一个概率分布适配的方法，适配源域和目标域的联合概率。在一个有原则的降维过程中同时适配源域和目标域的边缘分布和条件分布。JDA模型有两点假设：

- 源域和目标域边缘分布不同
- 源域和目标域条件分布不同

使用JDA模型对样本域进行映射，从而提高模型在目标域的表现。

In [2]:
import numpy as np
from scipy.linalg.misc import norm
from scipy.sparse.linalg import eigs

In [3]:
def JDA(Xs,Xt,Ys,Yt,k=100,lamba=0.1,ker='primal',gamma=1.0,data='default'):
    X = np.hstack((Xs,Xt))
    X = np.diag(1/np.sqrt(np.sum(X**2)))
    (m,n) = X.shape
    # 源域样本量
    ns = Xs.shape[1]
    # 目标域样本量
    nt = Xt.shape[1]
    # 分类个数
    C = len(np.unique(Ys))
    # 生成MMD矩阵
    e1 = 1/ns*np.ones((ns,1))
    e2 = 1/nt*np.ones((nt,1))
    e = np.vstack((e1,e2))
    M = np.dot(e,e.T)*C
    
    # 除0，null，False以外都可运行
    if any(Yt) and len(Yt)==nt:
        for c in np.reshape(np.unique(Ys), -1, 1):
            e1 = np.zeros((ns,1))
            e1[Ys == c] = 1/len(Ys[Ys == c])
            e2 = np.zeros((nt,1))
            e2[Yt == c] = -1/len(Yt[Yt == c])
            e = np.hstack((e1,e2))
            e = e[np.isinf(e) == 0]
            M = M+np.dot(e,e.T)
    
    # 矩阵迹求平方根
    M = M/norm(M, ord = 'fro')
    
    # 计算中心矩阵
    H = np.eye(n) - 1/(n)*np.ones((n,n))
    
    # JDA
    if ker == 'primal':
        # 特征值特征向量
        A = eigs(np.dot(np.dot(X,M),X.T) + lamda*np.eye(m),
                 k=k, M=np.dot(np.dot(X,H),X.T), which='SM')
        Z = np.dot(A.T,X)
    else:
        pass
    return A,Z  

# 二、DTELM

DTELM是一种在极限学习机（ELM）上改进得到的迁移学习方法，使用MMD（源域一直变换直到匹配目标域，匹配的度量方式就是MMD最大均值差异）距离衡量分布的差异，并模型迭代过程中缩小差异。DTELM分为两部分：

- 对目标域数据的自编码映射
- 针对源域数据的域对齐映射

In [4]:
import numpy as np
from scipy import sparse as sp

In [5]:
# np.zeros()返回返回来一个给定形状和类型的用0填充的数组
# np.ones()返回一个全1的n维数组
# np.hstack() 将参数元组的元素数组按水平方向进行叠加
# np.sort() 排序 默认/axis = 1 按行排序， axis = 0 按列排序
# np.eye() 生成主对角线、独热编码
# np.linalg.inv 求逆矩阵

In [8]:
def DTELM(Train_s, Train_t, Test_t, NL, Type="CALSSFIER", Num_hid=100, Active_Function="sig"):
    '''
    Train_s：源域训练集
    Train_t：目标域训练集
    Test_t：目标域测试集
    Type：模型类型（分类："CLASSIFIER"，回归："REGRESSION"）  
    Num_hid：隐层神经元个数，默认100个 
    Active_Function：映射函数（" sigmoid ":sigmoid函数, "sin":正弦函数）
    NL：模型选择  
    '''
    
    Cs = 0.01
    Ct = 0.01
    
    REGRESSION=0  
    CLASSIFIER=1
    
    # 训练数据
    T = Train_s[:,0].T
    P = Train_s[:,1:Train_s.shape[1]].T
    
    # 目标域数据
    Tt = Train_t[:,0].T
    Pt = Train_t[:,1:Train_t.shape[1]].T
    
    # 测试集数据
    TVT = Test_t[:,0].T
    TVP = Test_t[:,2:Test_t.shape[1]].T
    
    Num_train = P.shape[1]
    Num_train_target = Pt.shape[1]
    Num_test = TVP.shape[1]
    Num_input = P.shape[0]
    
    if Type is not "REGRESSION":
        sorted_target = np.sort(np.hstack((T,TVT)))
        label = np.zeros((1,1))
        label[0,0] = sorted_target[0,0]
        j = 0
        for i in range(2,(Num_train+Num_test+1)):
            if sorted_target[0,i-1] != label[0,j-1]:
                j=j+1
                label[0,j-1] = sorted_target[0,i-1]
        
        number_class = j+1
        Num_output = number_class
        
        temp_T = np.zeros(Num_output, Num_train)
        for i in range(1,Num_train+1):
            for j in range(1,number_class+1):
                if label(0,j-1) == T(0,i-1):
                    break
            temp_T[j-1, i-1] = 1
        T = temp_T*2 - 1
        
        Tt_m = np.zeros(Num_output , Num_train_Target)
        for i in range(1,Num_train_Target+1):
            for j in range(1,number_class+1):
                if label(0,j-1) == T(0,i-1):
                    break
            Tt_m[j-1, i-1] = 1
        Tt = Tt_m*2 -1
            
        
        temp_TV_T = np.zeros(Num_output,Num_test)
        for i in range(1,Num_test):
            for j in range(1,number_class+1):
                if label(0,j-1) == T(0,i-1):
                    break
            temp_TV_T[j-1, i-1] = 1
        TVT = temp_TV_T*2 -1
    
    Inputweight = np.random.rand(Num_hid,Num_input)*2-1
    Bis_hid = np.random.rand(Num_hid, 1)
    H_m = Inputweight*P
    Ht_m = Inputweight*Pt
    del P
    del Pt
    
    ind = np.ones(1,Num_train)
    indt = np.ones(1,Num_train_Target)
    BiasMatrix = Bis_hid[:,ind-1]  
    BiasMatrixT = Bis_hid[:,indt-1]
    H_m = H_m + BiasMatrix
    Ht_m = Ht_m + BiasMatrixT
    
    if Active_Function == "sigmoid":
        # sigmoid公式
        H = 1/(1+np.exp(-H_m)) 
        Ht = 1/(1+np.exp(-Ht_m))
    if Active_Function == "sin":
        H = np.sin(H_m)
        Ht = np.sin(Ht_m)
    if Active_Function != " sigmoid " and Active_Function!="sin":
        pass
    
    del H_m
    del Ht_m
    
    n = Num_hid
    
    '''
    DTELM模型
    '''
    H = H.T  
    Ht = Ht.T  
    T =T.T  
    Tt = Tt.T
    
    if NL == 0:
        A = Ht*H.T
        B = Ht*Ht.T + np.eye(Num_train_Target)/Ct
        C = H*Ht.T
        D = H*H.T + np.eye(Num_train)/Cs
        ApT = np.linalg.inv(B)*Tt-np.linalg.inv(B)*A* \
              np.linalg.inv(C*np.linalg.inv(B)*A-D)* \
              (C*np.linalg.inv(B)*Tt-T)
        ApS = inv(C*np.linalg.inv(B)*A-D)*(C*np.linalg.inv(B)*Tt-T)
        Outputweight = H.T*ApS+Ht.T*ApT
    else:
        Outputweight = np.linalg.inv(np.eye(n)+Cs*H.t*H+Ct*Ht.T*Ht)* \
                       (Cs*H.T*T+Ct*Ht.T*Tt)
    
    # 计算准确率
    Y = (H * OutputWeight).T
    H_m_test = InputWeight*TVP
    ind = np.ones(1,Num_hid)
    BiasMatrix = Bis_hid[:,ind-1]
    H_m_test = H_m_test+BiasMatrix
    if Active_Function == "sig":
        H_test = 1/(1+np.exp(-H_m_test))
    if Active_Function == "sin":
        H_test = np.sin(H_m_test)
        
    TY = (H_test.T*OutputWeight).T
    
    # 返回测试集结果
    if Type =="CLASSIFIER":
        return TY
    else:
        pass

## 三、总结

迁移学习为模型带来了样本上的扩充，但同时也造成精度的下降以及模型的不稳定性。

可以通过多模型组合来平衡迁移学习的不稳定，多模型加权输出最终结果，可以为模型引入更稳健的决策因子。