In [3]:
import sys
import os
import joblib
import json

import math
import numpy as np
import numpy.matlib
import scipy as sp
import scipy.io as sio 
import scipy.sparse as scp
from scipy.sparse.linalg import svds as SVD

import sklearn 
from sklearn import svm
from sklearn import linear_model
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

import numpy as np
import matlab
import matlab.engine
import scipy.io as sio

import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=RuntimeWarning) 
warnings.filterwarnings("ignore", category=ConvergenceWarning)


In [5]:
#from google.colab import drive

#drive.mount('/content/gdrive')
basepath = ""

## Spectral Feature Alignment (SFA)

In [3]:
class SFA:
    '''
    spectral feature alignment
    '''
    def __init__(self,l=500,K=100, base_classifer=svm.SVC()):
        self.l = l
        self.K = K
        self.m = 0
        self.ut = None
        self.phi = 1
        self.base_classifer = base_classifer
        self.ix = None
        self._ix = None
        return

    def fit(self, Xs,Xt):

        # Sort indices by highest sum of columns (highest word frequency)
        ix_s = np.argsort(np.sum(Xs, axis=0))
        ix_t = np.argsort(np.sum(Xt, axis=0))

        ix_s = ix_s[::-1][:self.l]
        ix_t = ix_t[::-1][:self.l]
        # Instersect words with highest word frequency in both source and target
        ix = np.intersect1d(ix_s, ix_t)
        # Complement of previous index
        _ix = np.setdiff1d(range(Xs.shape[1]), ix)
        self.ix = ix
        self._ix = _ix
        self.m = len(_ix)
        self.l = len(ix)

        
        X = np.concatenate((Xs, Xt), axis=0)
        # High frequency word matrix. 1 in every spot where there was a high 
        # frequency word count
        DI = (X[:, ix]>0).astype('float')
        # Low frequency word matrix. 1 in every spot where there's a low 
        # frequency word count
        DS = (X[:, _ix]>0).astype('float')

        # construct co-occurrence matrix DSxDI
        M = np.zeros((self.m,self.l))
        for i in range(X.shape[0]):
            tem1 = np.reshape(DS[i], (1, self.m))
            tem2 = np.reshape(DI[i], (1, self.l))
            M += np.matmul(tem1.T, tem2)
        M = M/np.linalg.norm(M, 'fro')
        M = scp.lil_matrix(M)
        D1 = scp.lil_matrix((self.m, self.m))
        D2 = scp.lil_matrix((self.l, self.l))
        for i in range(self.m):
            D1[i,i] = 1.0/np.sqrt(np.sum(M[i,:]))
        for i in range(self.l):
            D2[i,i] = 1.0/np.sqrt(np.sum(M[:,i]))
        B = (D1.tocsr().dot(M.tocsr())).dot(D2.tocsr())
        ut, s, vt = SVD(B.tocsc(), k=self.K)
        self.ut = ut
        return ut

    def transform(self, X):
        return np.concatenate((X, X[:, self._ix].dot(self.ut)), axis=1)

    def fit_predict(self,Xs, Xt, X_test, Ys, Y_test):
        ut = self.fit(Xs, Xt)
        Xs = self.transform(Xs)
        self.base_classifer.fit(Xs, Ys)
        X_test = self.transform(X_test)
        y_pred = self.base_classifer.predict(X_test)
        acc = accuracy_score(Y_test, y_pred)
        return acc

In [4]:
#datasets = ['K', 'D', 'B', 'E']
datasets = ['K', 'D']

results = {}
for dataset1 in datasets:
    for dataset2 in datasets:
        if dataset1 == dataset2:
            continue
        [Xs, Ys, X_test, Y_test, Xt]=joblib.load(
                    os.path.join(basepath, dataset1+'-'+dataset2+'.pkl'))
        Xs = Xs.astype('float')
        X_test = X_test.astype('float')
        Xt = Xt.astype('float')
        model = SFA()
        acc = model.fit_predict(Xs, Xt, X_test, Ys, Y_test)
        print(dataset1, dataset2, acc)
        results[dataset1+dataset2] = acc

with open("SFA_record.json",'w') as json_file:	
		json.dump(results, json_file)



K D 0.647




D K 0.7155


## Structural Correspondence Learning (SCL)

In [5]:
class SCL(object):
    '''
    class of structural correspondence learning 
    '''
    def __init__(self, l2=1.0, num_pivots=10, base_classifer=LinearSVC()):
        self.l2 = l2
        self.num_pivots = num_pivots
        self.W = 0
        self.base_classifer = base_classifer
        # self.train_data_dim = None

    def fit(self, Xs, Xt):
        '''
        find pivot features and transfer the Xs and Xt
        Param Xs: source data
        Param Xt: target data
        output Xs_new: new source data features
        output Xt_new: new target data features
        output W: transform matrix
        '''
        _, ds = Xs.shape
        _, dt = Xt.shape
        assert ds == dt
        X = np.concatenate((Xs, Xt), axis=0)
        ix = np.argsort(np.sum(X, axis=0))
        ix = ix[::-1][:self.num_pivots]
        pivots = (X[:, ix]>0).astype('float')
        p = np.zeros((ds, self.num_pivots))
        # train for the classifers 
        for i in range(self.num_pivots):
            clf = linear_model.SGDClassifier(loss="modified_huber", alpha=self.l2)
            clf.fit(X, pivots[:, i])
            p[:, i] = clf.coef_
        _, W = np.linalg.eig(np.cov(p))
        W = W[:, :self.num_pivots].astype('float')
        self.W = W
        Xs_new = np.concatenate((np.dot(Xs, W), Xs), axis=1)
        Xt_new = np.concatenate((np.dot(Xt, W), Xt), axis=1)

        return Xs_new, Xt_new, W

    def transform(self, X):
        '''
        transform the origianl data by add new features
        Param X: original data
        output x_new: X with new features
        '''
        X_new = np.concatenate((np.dot(X, self.W),X), axis=1)
        return X_new
    
    def fit_predict(self, Xs, Xt, X_test, Ys, Y_test):
        self.fit(Xs, Xt)
        Xs = self.transform(Xs)
        self.base_classifer.fit(Xs, Ys)
        X_test = self.transform(X_test)
        y_pred = self.base_classifer.predict(X_test)
        acc = accuracy_score(Y_test, y_pred)
        return acc

In [6]:
#datasets = ['K', 'D', 'B', 'E']
datasets = ['K', 'D']

results = {}
for dataset1 in datasets:
    for dataset2 in datasets:
        if dataset1 == dataset2:
            continue
        [Xs, Ys, X_test, Y_test, Xt]=joblib.load(
                    os.path.join(basepath, dataset1+'-'+dataset2+'.pkl'))
        Xs = Xs.astype('float')
        X_test = X_test.astype('float')
        Xt = Xt.astype('float')
        model = SCL()
        acc = model.fit_predict(Xs, Xt, X_test, Ys, Y_test)
        print(dataset1, dataset2, acc)
        results[dataset1+dataset2] = acc

with open("SCL_record.json",'w') as json_file:	
		json.dump(results, json_file)



K D 0.7215




D K 0.747


 ## Marginalized Denoising Autoencoders for Domain Adaptation (mSDA)

In [7]:
class mSDA(object):
    '''
    Implement mSDA.
    To read more about the SDA, check the following paper:
        Chen M , Xu Z , Weinberger K , et al.
        Marginalized Denoising Autoencoders for Domain Adaptation[J].
        Computer Science, 2012.
    This implementation of mSDA is based on both the sample code the authors provided
    as well as the equations in the paper.
    The code is modified according to https://github.com/douxu896/mSDA
    '''
    def __init__(self, p=None, l=5, act=np.tanh, Ws=None, bias=True):
        '''
        :param p: corruption probability
        :param l: number of layers
        :param act: what nonlinearity to use? if None, not to use nonlinearity.
        :param Ws: model parameters. Can optionally pass in precomputed Ws to use to transform X.
                (e.g. if transforming test X with Ws learned from training X)
        :param bias: Whether to use bias?
        '''
        self.p = p
        self.l = l
        self.act = act
        self.Ws = Ws
        self.bias = bias

    def mDA(self, X, W=None):
        '''
        One layer Marginalized Denoising Autoencoder.
        Learn a representation h of X by reconstructing "corrupted" input but marginalizing out corruption
        :param X: input features, shape:(num_samples,num_features)
        :param W: model parameters. Can optionally pass in precomputed W to use to transform X.
                (e.g. if transforming test X with W learned from training X)
        :return: model parameters, reconstructed representation.
        '''
        if self.bias:
            X=np.hstack((X, np.ones((X.shape[0], 1))))
        if W is None:
            W = self._compute_reconstruction_W(X)
        h = np.dot(X, W)  # no nonlinearity
        if self.act is not None:
            h = self.act(h)  # inject nonlinearity
        return W, h

    def _compute_reconstruction_W(self, X):
        '''
        Learn reconstruction parameters.
        :param X: input features, shape:(num_samples,num_features)
        :return: model parameters.
        '''
        # typecast to correct Xtype
        X.dtype = "float64"
        d = X.shape[1]
        # Represents the probability that a given feature will be corrupted
        if self.bias:
            q = np.ones(
                (d-1, 1)) * (1 - self.p)
            # add bias probability
            q=np.vstack((q,1))
        else:
            q = np.ones(
                (d, 1)) * (1 - self.p)

        S = np.dot(X.transpose(), X)
        Q = S * (np.dot(q, q.transpose()))
        Q[np.diag_indices_from(Q)] = q[:,0] * np.diag(S)
        P = S * numpy.matlib.repmat(q, 1, d)

        # solve equation of the form W = BA^-1
        A = Q + 10**-5 * np.eye(d)
        B = P[:-1,:]
        W = np.linalg.solve(A.transpose(), B.transpose())
        return W

    def fit(self, X):
        '''
        Stack mDA layers on top of each other, using previous layer as input for the next
        :param X: input features, shape:(num_samples,num_features)
        :return: None
        '''
        Ws = list()
        hs = list()
        hs.append(X)
        for layer in range(0, self.l):
            W, h = self.mDA(hs[-1])
            Ws.append(W)
            hs.append(h)
        self.Ws = Ws

    def transform(self, X):
        '''
        Should be called after fit!
        Stack mDA layers on top of each other, using previous layer as input for the next
        :param X: input features, shape:(num_samples,num_features)
        :return: reconstructed representation of the last layer.
        '''
        if self.Ws is None:
            raise ValueError('Please fit on some data first.')
        hs = list()
        hs.append(X)
        for layer in range(0, self.l):
            _, h = self.mDA(hs[-1], self.Ws[layer])
            hs.append(h)
        return hs[-1]

    def fit_transform(self, X):
        '''
        Stack mDA layers on top of each other, using previous layer as input for the next
        :param X: input features, shape:(num_samples,num_features)
        :return: reconstructed representation of the last layer.
        '''
        Ws = list()
        hs = list()
        hs.append(X)
        for layer in range(0, self.l):
            W, h = self.mDA(hs[-1])
            Ws.append(W)
            hs.append(h)
        self.Ws = Ws
        return hs[-1]


In [8]:
# test implementation

# load dataset1
[Xs_train, Ys_train, Xs_test, Ys_test, Xs_unlabeled]=joblib.load(
            os.path.join(basepath, 'K-D.pkl'))
# load dataset2
[Xt_train, Xt_train_label, Xt_test, Xt_test_label, Xt_unlabeled]=joblib.load(
            os.path.join(basepath, 'D-K.pkl'))

from sklearn import svm

clf = svm.SVC().fit(Xs_train, Ys_train)
preds_Xs = clf.predict(Xs_test)
acc = np.mean(preds_Xs == Ys_test)
print("Xs acc on regular X: ", acc)
preds_Xt = clf.predict(Xt_test)
acc = np.mean(preds_Xt == Xt_test_label)
print("Xt acc on regular X: ", acc)

# set corruption probability, number of layers and bias.
pp = 0.3
ll = 5
bias = True
train_X=np.concatenate((Xs_train,Xs_unlabeled,Xt_train,Xt_unlabeled),axis=0)
msda=mSDA(p=pp, l=ll,act=np.tanh, Ws=None, bias=True)
msda.fit(train_X)
Xs_reps = msda.transform(Xs_train)
print("Shape of mSDA Xs_reps h: ", Xs_reps.shape)
Xs_test_reps = msda.transform(Xs_test)
print("Shape of mSDA Xs_test_reps h: ", Xs_test_reps.shape)
Xt_reps = msda.transform(Xt_test)
print("Shape of mSDA Xt_test_reps h: ", Xt_reps.shape)

clf = svm.SVC().fit(Xs_reps, Ys_train)
preds_Xs=clf.predict(Xs_test_reps)
acc=np.mean(preds_Xs == Ys_test)
print("Xs acc with linear SVM on mSDA features: ", acc)
preds_Xt = clf.predict(Xt_reps)
acc = np.mean(preds_Xt == Xt_test_label)
print("Xt acc with linear SVM on mSDA features: ", acc)




Xs acc on regular X:  0.6275
Xt acc on regular X:  0.7495
Shape of mSDA Xs_reps h:  (2000, 5000)
Shape of mSDA Xs_test_reps h:  (2000, 5000)
Shape of mSDA Xt_test_reps h:  (2000, 5000)




Xs acc with linear SVM on mSDA features:  0.769
Xt acc with linear SVM on mSDA features:  0.868


## Stacked Denoising Autoencoders (SDA)

In [9]:
import tensorflow as tf
#import tensorflow.compat.v1.keras.backend as K
from keras import backend as K
os.environ['KERAS_BACKEND'] = "tensorflow"
#gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
#sess = tf.compat.v1.InteractiveSession(
#    config=tf.compat.v1.ConfigProto(
#        gpu_options=gpu_options))
#K.set_session(sess)
from keras.models import Model
from keras.layers import Input
from keras.layers.core import Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.utils.np_utils import to_categorical

class SDA(object):
    '''
    Implements Stacked Denoising Autoencoders in Keras.
    To read more about the SDA, check the following paper:
        Vincent P , Larochelle H , Bengio Y , et al.
        Extracting and Composing Robust Features with Denoising Autoencoders[C]//
        International Conference on Machine Learning. ACM, 2008.
    The code is modified according to https://github.com/MadhumitaSushil/SDAE
    '''

    def __init__(
            self,
            nb_layers=2,
            nb_hid=[100],
            dropout=[0.1],
            enc_act=['tanh'],
            dec_act=['linear'],
            bias=True,
            loss_fn='mse',
            batch_size=32,
            nb_epoch=300,
            optimizer='adam',
            verbose=1,
            base_classifer=svm.LinearSVC()):
        '''
        Initializes parameters for stacked denoising autoencoders
        :param nb_layers: number of layers, i.e., number of autoencoders to stack on top of each other.
        :param nb_hid: list with the number of hidden nodes per layer. If only one value specified, same value is used for all the layers
        :param dropout: list with the proportion of X_train nodes to mask at each layer. If only one value is provided, all the layers share the value.
        :param enc_act: list with activation function for encoders at each layer. Typically sigmoid.
               See also keras.activations for available activation functions.
        :param dec_act: list with activation function for decoders at each layer.
               Typically the same as encoder for binary X_train, linear for real X_train.
               See also keras.activations for available activation functions.
        :param bias: True to use bias value.
        :param loss_fn: The loss function. Typically 'mse' is used for real values. Options can be found here: https://keras.io/objectives/
        :param batch_size: mini batch size for gradient update
        :param nb_epoch: number of epochs to train each layer
        :param optimizer: The optimizer to use. See also keras.optimizers.
        :param verbose: Verbosity mode, 0, 1, or 2.
        '''
        self.nb_layers = nb_layers
        # if only one value specified for nb_hid, dropout, enc_act or dec_act,
        # use the same parameters for all layers.
        self.nb_hid, self.dropout, self.enc_act, self.dec_act = \
            self._assert_input(nb_layers, nb_hid, dropout, enc_act, dec_act)
        self.bias = bias
        self.loss_fn = loss_fn
        self.batch_size = batch_size
        self.nb_epoch = nb_epoch
        self.optimizer = optimizer
        self.verbose = verbose

        self.encoder_model = None
        self.fine_tuned_model = None

    def fit(self, X_train=None, X_val=None, patience=1, dropout_all=False, model_layers=None):
        '''
        Should be called before self.transform and self.fine_tune
        Pretrains layers of a stacked denoising autoencoder to generate low-dimensional representation of data.
        Returns a list of pretrained sda layers for continue training pre-trained model_layers, if required.
        The self.encoder_model can be used in supervised task by adding a classification/regression layer on top,
        see also self.fine_tune.
        :param X_train: input data (scipy sparse matrix supported). shape:(num_samples,num_features)
        :param X_val: validation data (scipy sparse matrix supported). shape:(num_samples,num_features)
        :param patience: number of epochs with no improvement after which training will be stopped. Useful when X_val is not None.
        :param dropout_all: True to include dropout layer between all layers in the learned encoder model.
               By default, dropout is only present for input in the learned encoder model.
        :param model_layers: [DA1,DA2,...],Pretrained cur_model layers, to continue training pre-trained model_layers, if required
        :return : model_layers for continue training pre-trained model_layers, if required
        '''
        self._print_sda_config()
        if model_layers is not None:
            self.nb_layers = len(model_layers)
        else:
            model_layers = [None] * self.nb_layers

        encoders = []
        for cur_layer in range(self.nb_layers):
            if model_layers[cur_layer] is None:
                # same dim of output units as input units (to reconstruct the
                # signal)
                nb_dim = X_train.shape[1]
                input_layer = Input(shape=(nb_dim,))
                # masking input data to learn to generalize, and prevent
                # identity learning
                dropout_layer = Dropout(self.dropout[cur_layer])
                in_dropout = dropout_layer(input_layer)
                encoder_layer = Dense(
                    units=self.nb_hid[cur_layer],
                    kernel_initializer='glorot_uniform',
                    activation=self.enc_act[cur_layer],
                    name='encoder' + str(cur_layer),
                    use_bias=self.bias)
                encoder = encoder_layer(in_dropout)
                decoder_layer = Dense(
                    units=nb_dim,
                    use_bias=self.bias,
                    kernel_initializer='glorot_uniform',
                    activation=self.dec_act[cur_layer],
                    name='decoder' + str(cur_layer))
                decoder = decoder_layer(encoder)
                cur_model = Model(input_layer, decoder)
                cur_model.compile(loss=self.loss_fn, optimizer=self.optimizer)
            else:
                cur_model = model_layers[cur_layer]
            print("Training layer " + str(cur_layer))
            if X_val is not None:
                early_stopping = EarlyStopping(
                    monitor='val_loss', patience=patience, verbose=1)
                cur_model.fit_generator(
                    generator=data_generator.batch_generator(
                        X_train,
                        X_train,
                        batch_size=self.batch_size,
                        shuffle=True),
                    callbacks=[early_stopping],
                    epochs=self.nb_epoch,
                    steps_per_epoch=int(np.ceil(X_train.shape[0] / self.batch_size)),
                    verbose=self.verbose,
                    validation_data=data_generator.batch_generator(
                        X_val,
                        X_val,
                        batch_size=self.batch_size,
                        shuffle=False),
                    validation_steps=int(np.ceil(X_val.shape[0] / self.batch_size)))
            else:
                cur_model.fit_generator(
                    generator=data_generator.batch_generator(
                        X_train,
                        X_train,
                        batch_size=self.batch_size,
                        shuffle=True),
                    epochs=self.nb_epoch,
                    steps_per_epoch=int(np.ceil(X_train.shape[0] / self.batch_size)),
                    verbose=self.verbose,
                )

            print("Layer " + str(cur_layer) + " has been trained.")

            model_layers[cur_layer] = cur_model
            encoder_layer = cur_model.layers[-2]
            encoders.append(encoder_layer)


            # train = 0 because we do not want to use dropout to get hidden node value,since is a train-only behavior,
            # used only to learn weights. output of second layer: hidden
            # layer(encoder layer)
            X_train = self._get_intermediate_output(
                cur_model,
                X_train,
                n_layer=2,
                train=0,
                n_out=self.nb_hid[cur_layer],
                batch_size=self.batch_size)
            assert X_train.shape[1] == self.nb_hid[cur_layer], "Output of hidden layer not retrieved"
            if X_val is not None:
                X_val = self._get_intermediate_output(
                    cur_model,
                    X_val,
                    n_layer=2,
                    train=0,
                    n_out=self.nb_hid[cur_layer],
                    batch_size=self.batch_size)
        self.encoder_model = self._build_model_from_encoders(
            encoders, dropout_all=dropout_all)
        return model_layers

    def _build_model_from_encoders(self, encoding_layers, dropout_all=False):
        '''
        Builds a deep NN model that generates low-dimensional representation of input, based on pretrained layers.
        :param encoding_layers: pretrained encoder layers
        :param dropout_all: True to include dropout layer between all layers. By default, dropout is only present for input.
        :return model with each encoding layer as a layer of a NN
        '''
        input_layer = Input(shape=(encoding_layers[0].input_shape[1],))
        dropouted = Dropout(self.dropout[0])(input_layer)

        for i in range(len(encoding_layers)):
            if i and dropout_all:
                dropouted = Dropout(self.dropout[i])(dropouted)

            encoding_layers[i].inbound_nodes = []
            dropouted = encoding_layers[i](dropouted)
        model = Model(input_layer, dropouted)
        return model

    def fine_tune(
            self,
            X_train,
            y_train,
            X_val=None,
            y_val=None,
            nb_classes=2,
            patience=1,
            final_act_fn='softmax',
            loss='categorical_crossentropy',
            optimizer='adam',
            batch_size=32,
            nb_epoch=300,
            verbose=1):
        '''
        Should be called after self.fit!
        The self.encoder_model can be used in supervised task by adding a classification/regression layer on top.
        Classification by fine-tuning a pre-trained encoder model for a given task.
        :param X_train: input data (scipy sparse matrix supported). shape:(num_samples,num_features)
        :param y_train: input data labels. class vector to be converted into a matrix(integers from 0 to num_classes).
        :param X_val: validation data (scipy sparse matrix supported). shape:(num_samples,num_features)
        :param y_val: validation data labels. class vector to be converted into a matrix(integers from 0 to num_classes).
        :param nb_classes: number of classes.
        :param patience: number of epochs with no improvement after which training will be stopped. Useful when X_val is not None.
        :param final_act_fn: The activation function for classification. Typically 'softmax'.
               See also keras.activations for available activation functions.
        :param loss: The loss function for classification. Typically 'categorical_crossentropy'.
               See also keras.losses for available loss functions.
        :param optimizer: The optimizer to use. See also keras.optimizers.
        :param batch_size: mini batch size for gradient update
        :param nb_epoch: number of epochs to train.
        :param verbose: Verbosity mode, 0, 1, or 2.
        '''
        if self.encoder_model is None:
            raise ValueError('Please fit on some data first.')
        output=Dense(nb_classes, activation=final_act_fn)(self.encoder_model.output)
        model=Model(self.encoder_model.input,output)
        model.compile(loss=loss, optimizer=optimizer)
        if X_val is not None:
            early_stopping = EarlyStopping(monitor='val_loss', patience=patience, verbose=0)
            model.fit_generator(
                generator=data_generator.batch_generator(
                    X_train,
                    y_train,
                    batch_size=batch_size,
                    shuffle=True,
                    nb_classes=nb_classes,
                    one_hot=True),
                steps_per_epoch=int(np.ceil(X_train.shape[0]/batch_size)),
                callbacks=[early_stopping],
                epochs=nb_epoch,
                verbose=verbose,
                validation_data=data_generator.batch_generator(
                    X_val,
                    y_val,
                    batch_size=batch_size,
                    shuffle=False,
                    nb_classes=nb_classes,
                    one_hot=True),
                validation_steps=int(np.ceil(X_val.shape[0]/batch_size)))
        else:
            model.fit_generator(
                generator=data_generator.batch_generator(
                    X_train,
                    y_train,
                    batch_size=batch_size,
                    shuffle=True,
                    nb_classes=nb_classes,
                    one_hot=True),
                steps_per_epoch=int(np.ceil(X_train.shape[0]/batch_size)),
                epochs=nb_epoch,
                verbose=verbose)

        self.fine_tuned_model=model

    def predict(self, X, batch_size=32):
        '''
        Should be called after self.fit and self.fine_tune!
        Generates class probability predictions for the input samples.
        :param X: input data (scipy sparse matrix supported). shape:(num_samples,num_features)
        :param batch_size: mini batch size for gradient update
        :return: probability predictions for X
        '''
        if self.fine_tuned_model is None:
            raise ValueError('Please fine_tune on some data first.')
        preds=self.fine_tuned_model.predict_generator(generator=data_generator.batch_generator(
                        X,
                        None,
                        batch_size=batch_size,
                        shuffle=False), steps=int(np.ceil(X.shape[0] / batch_size)))
        if preds.min() < 0. or preds.max() > 1.:
            warnings.warn('Network returning invalid probability values. '
                          'The last layer might not normalize predictions '
                          'into probabilities '
                          '(like softmax or sigmoid would).')
        return preds

    def transform(self, X, batch_size=32):
        """
        Should be called after self.fit!
        Transform the X into the dense representation of the last layer of the learned encoder model.
        The dense representation of X can be used in some traditional models, such as LR, SVM, KNN or clustering.
        :param X: input data (scipy sparse matrix supported). shape:(num_samples,num_features)
        :param batch_size: mini batch size for gradient update
        :return : The dense representation of the last layer of the learned encoder model of X.
        """
        if self.encoder_model is None:
            raise ValueError('Please fit on some data first.')
        transformed_rep = self.encoder_model.predict_generator(
            generator=data_generator.batch_generator(
                X, None, batch_size=batch_size, shuffle=False), steps=int(np.ceil(X.shape[0]/batch_size)))

        return transformed_rep

    def fit_predict(self, Xs, Xt, X_test, Ys, Y_test):
        ut = self.fit(Xs, Xt)
        Xs = self.transform(Xs)
        self.base_classifer.fit(Xs, Ys)
        X_test = self.transform(X_test)
        y_pred = self.base_classifer.predict(X_test)
        acc = accuracy_score(Y_test, y_pred)
        return acc

    def _print_sda_config(self):
        """
        Print the configuration of the SDA
        """
        print("Number of layers: " + str(self.nb_layers))

        print("Hidden nodes: ")
        s = ''
        for i in range(self.nb_layers):
            s += str(self.nb_hid[i]) + ' '
        print(s)

        print("Dropout: ")
        s = ''
        for i in range(self.nb_layers):
            s += str(self.dropout[i]) + ' '
        print(s)

        s = ''
        print("Encoder activation: ")
        for i in range(self.nb_layers):
            s += str(self.enc_act[i]) + ' '
        print(s)

        print("Decoder activation: ")
        s = ''
        for i in range(self.nb_layers):
            s += str(self.dec_act[i]) + ' '
        print(s)

        print("Epochs: " + str(self.nb_epoch))
        print("Bias: " + str(self.bias))
        print("Loss: " + str(self.loss_fn))
        print("Batch size: " + str(self.batch_size))
        print("Optimizer: " + str(self.optimizer))

    def _assert_input(self, nb_layers, nb_hid, dropout, enc_act, dec_act):
        '''
        If the hidden nodes, dropout proportion, encoder activation function or decoder activation function is given, it uses the same parameter for all the layers.
        Errors out if there is a size mismatch between number of layers and parameters for each layer.
        '''

        if len(nb_hid) == 1:
            nb_hid = nb_hid * nb_layers

        if len(dropout) == 1:
            dropout = dropout * nb_layers

        if len(enc_act) == 1:
            enc_act = enc_act * nb_layers

        if len(dec_act) == 1:
            dec_act = dec_act * nb_layers

        assert (nb_layers == len(nb_hid) == len(dropout) == len(enc_act) == len(dec_act)), \
            "Please specify as many hidden nodes, dropout proportion on input, " \
            "and encoder and decoder activation function, as many layers are there, using list data structure."

        return nb_hid, dropout, enc_act, dec_act

    def _get_intermediate_output(
            self,
            model,
            X_train,
            n_layer,
            train,
            n_out,
            batch_size,
            dtype=np.float32):
        '''
        Returns output of a given intermediate layer in a model
        :param model: model to get output from
        :param X_train: sparse representation of input data
        :param n_layer: the layer number for which output is required
        :param train: (0/1) 1 to use training config, like dropout noise.
        :param n_out: number of output nodes in the given layer (pre-specify so as to use generator function with sparse matrix to get layer output)
        :param batch_size: the num of instances to convert to dense at a time
        :return value of intermediate layer
        '''
        data_out = np.zeros(shape=(X_train.shape[0], n_out))

        x_batch_gen = data_generator.x_generator(
            X_train, batch_size=batch_size, shuffle=False)
        stop_iter = int(np.ceil(X_train.shape[0] / batch_size))

        for i in range(stop_iter):
            cur_batch, cur_batch_idx = next(x_batch_gen)
            data_out[cur_batch_idx, :] = self._get_nth_layer_output(
                model, n_layer, X=cur_batch, train=train)

        return data_out.astype(dtype, copy=False)

    def _get_nth_layer_output(self, model, n_layer, X, train=1):
        '''
        Returns output of nth layer in a given model.
        :param model: keras model to get an intermediate value out of
        :param n_layer: the layer number to get the value of
        :param X: input data for which layer value should be computed and returned.
        :param train: (1/0): 1 to use the same setting as training (for example, with Dropout, etc.), 0 to use the same setting as testing phase for the model.
        :return the value of n_layer in the given model, input, and setting
        '''
        get_nth_layer_output = K.function([model.layers[0].input, K.learning_phase()],
                                          [model.layers[n_layer].output])
        return get_nth_layer_output([X, train])[0]

class data_generator(object):
    @classmethod
    def batch_generator(
            cls,
            X,
            Y=None,
            batch_size=32,
            shuffle=True,
            nb_classes=2,
            one_hot=False,
            seed=1337):
        '''
        Creates batches of data from given dataset, given a batch size. Returns dense representation of sparse input.
        :param X: input features, sparse or dense
        :param Y: input labels, sparse or dense. If Y is None, return generated X only.
        :param batch_size: number of instances in each batch
        :param shuffle: If True, shuffle input instances.
        :param nb_classes: number of classes for one-hot labels.
        :param one_hot: Weather to transform Y to one_hot labels.
        :param seed: fixed seed for shuffling data, for replication
        :return batch of input features and <labels>
        '''
        number_of_batches = int(
            np.ceil(
                X.shape[0] /
                batch_size))  # ceil function allows for creating last batch off remaining samples
        counter = 0
        sample_index = np.arange(X.shape[0])
        if shuffle:
            np.random.seed(seed)
            np.random.shuffle(sample_index)
        if Y is not None and one_hot:
            Y = to_categorical(Y, nb_classes)
        sparse = False
        if scp.issparse(X):
            sparse = True

        while True:
            batch_index = sample_index[batch_size *
                                       counter:batch_size * (counter + 1)]
            if sparse:
                # converts to dense array
                x_batch = X[batch_index, :].toarray()
                if Y is not None:
                    # converts to dense array
                    y_batch = Y[batch_index, :].toarray()
            else:
                x_batch = X[batch_index, :]
                if Y is not None:
                    y_batch = Y[batch_index, :]
            counter += 1
            if Y is not None:
                yield x_batch, y_batch
            else:
                yield x_batch
            if counter == number_of_batches:
                if shuffle:
                    np.random.shuffle(sample_index)
                counter = 0

    @classmethod
    def x_generator(cls, X, batch_size, shuffle, seed=1337):
        '''
        Creates batches of data from given input, given a batch size. Returns dense representation of sparse input one batch a time.
        :param X: input features, can be sparse or dense
        :param batch_size: number of instances in each batch
        :param shuffle: If True, shuffle input instances.
        :param seed: fixed seed for shuffling data, for replication
        :return batch of input data
        '''
        number_of_batches = int(
            np.ceil(
                X.shape[0] /
                batch_size))  # ceil function allows for creating last batch off remaining samples
        counter = 0
        sample_index = np.arange(X.shape[0])

        if shuffle:
            np.random.seed(seed)
            np.random.shuffle(sample_index)

        sparse = False
        if scp.issparse(X):
            sparse = True

        while counter < number_of_batches:
            batch_index = sample_index[batch_size *
                                       counter:batch_size * (counter + 1)]
            if sparse:
                # converts to dense array
                x_batch = X[batch_index, :].toarray()
            else:
                x_batch = X[batch_index, :]
            yield x_batch, batch_index
            counter += 1

Using TensorFlow backend.


In [10]:
#datasets = ['K', 'D', 'B', 'E']
datasets = ['K', 'D']

results = {}
for dataset1 in datasets:
    for dataset2 in datasets:
        if dataset1 == dataset2:
            continue
        [Xs, Ys, X_test, Y_test, Xt]=joblib.load(
                    os.path.join(basepath, dataset1+'-'+dataset2+'.pkl'))
        Xs = Xs.astype('float')
        X_test = X_test.astype('float')
        Xt = Xt.astype('float')
        model = SDA()
        acc = model.fit_predict(Xs, Xt, X_test, Ys, Y_test)
        print(dataset1, dataset2, acc)
        results[dataset1+dataset2] = acc

with open("SDA_record.json",'w') as json_file:	
		json.dump(results, json_file)

Number of layers: 2
Hidden nodes: 
100 100 
Dropout: 
0.1 0.1 
Encoder activation: 
tanh tanh 
Decoder activation: 
linear linear 
Epochs: 300
Bias: True
Loss: mse
Batch size: 32
Optimizer: adam


AttributeError: module 'tensorflow' has no attribute 'get_default_graph'

## HIDC

In [13]:


# ?! HIDC matlab中存在steplen的设置，at matrixproduce
class HIDC:
    def __init__(self, numIdentical=20, numAlike=20, numDistinct=10, numIter=10):
        self.numIdentical = numIdentical
        self.numAlike = numAlike
        self.numDistinct = numDistinct
        self.numIter = numIter
        return
    
    def fit_predict(self, Xs, Xt, Ys, Yt):
        inputPath = 'data.mat'
        
        eng = matlab.engine.start_matlab()
        sio.savemat('data.mat',{'TrainData': Xs.T, 'TrainLabel': Ys, 'TestData': Xt.T, 'TestLabel':Yt})
        result = eng.HIDC_enterFunc(self.numIdentical, self.numAlike, self.numDistinct, self.numIter, inputPath)
        
        eng.exit()
        # Y_pred = np.asarray(Y_pred)
        # Y_pred = np.reshape(Y_pred, (Y_pred.shape[1],)) 
        return result

In [32]:
datasets = ['K', 'D']

for dataset1 in datasets:
    for dataset2 in datasets:
        if dataset1 == dataset2:
            continue
        [Xs, Ys, X_test, Y_test, Xt]=joblib.load(
                    os.path.join(basepath, dataset1+'-'+dataset2+'.pkl'))

print(Xs.shape)
print(Ys.shape)
print(X_test.shape)
print(Y_test.shape)
print(Xt.shape)

(2000, 5000)
(2000,)
(2000, 5000)
(2000,)
(5945, 5000)


In [24]:
#datasets = ['K', 'D', 'B', 'E']
datasets = ['K', 'D']

results = {}
for dataset1 in datasets:
    for dataset2 in datasets:
        if dataset1 == dataset2:
            continue
        [Xs, Ys, X_test, Y_test, Xt]=joblib.load(
                    os.path.join(basepath, dataset1+'-'+dataset2+'.pkl'))
        Xs = Xs.astype('float')
        X_test = X_test.astype('float')
        Xt = Xt.astype('float')
        model = HIDC()
        acc = model.fit_predict(Xs[:10], Xt[:10], Ys[:10], Y_test[:10])
        print(dataset1, dataset2, acc)
        results[dataset1+dataset2] = acc

with open("HIDC_record.json",'w') as json_file:	
		json.dump(results, json_file)

MatlabExecutionError: 
  File C:\Users\stuar\OneDrive\Documents\CSCI115\presentation1\GenerativeTriTL.m, line 165, in GenerativeTriTL

  File C:\Users\stuar\OneDrive\Documents\CSCI115\presentation1\HIDC_enterFunc.m, line 17, in HIDC_enterFunc
Unable to perform assignment because the size of the left side is 10-by-1 and the size of the right side is 10-by-2.


## TriTL

In [25]:
import numpy as np
import matlab
import matlab.engine
import scipy.io as sio

class TriTL:
    def __init__(self, numIdentical=20, numAlike=20, numDistinct=10, numIter=100):
        self.numIdentical = numIdentical
        self.numAlike = numAlike
        self.numDistinct = numDistinct
        self.numIter = numIter
        return
    
    def fit_predict(self, Xs, Xt, Ys, Yt):
        inputPath = 'data.mat'
        
        eng = matlab.engine.start_matlab()
        sio.savemat('data.mat',{'TrainData': Xs.T, 'TrainLabel': Ys, 'TestData': Xt.T, 'TestLabel':Yt})
        result = eng.TriTL_enterFunc(self.numIdentical, self.numAlike, self.numDistinct, self.numIter, inputPath)
        eng.exit()
        # Y_pred = np.asarray(Y_pred)
        # Y_pred = np.reshape(Y_pred, (Y_pred.shape[1],)) 
        return result

In [31]:
#datasets = ['K', 'D', 'B', 'E']
datasets = ['K', 'D']

results = {}
for dataset1 in datasets:
    for dataset2 in datasets:
        if dataset1 == dataset2:
            continue
        [Xs, Ys, X_test, Y_test, Xt]=joblib.load(
                    os.path.join(basepath, dataset1+'-'+dataset2+'.pkl'))
        Xs = Xs.astype('float')
        X_test = X_test.astype('float')
        Xt = Xt.astype('float')
        model = TriTL()
        acc = model.fit_predict(Xs[:10], Xt[:10], Ys[:10], Y_test[:10])
        print(dataset1, dataset2, acc)
        results[dataset1+dataset2] = acc

with open("TriTL_record.json",'w') as json_file:	
		json.dump(results, json_file)

MatlabExecutionError: 
  File C:\Users\stuar\OneDrive\Documents\CSCI115\presentation1\TriTL.m, line 245, in TriTL

  File C:\Users\stuar\OneDrive\Documents\CSCI115\presentation1\TriTL_enterFunc.m, line 16, in TriTL_enterFunc
Incorrect dimensions for matrix multiplication. Check that the number of columns in the first matrix matches the number of rows in the second matrix. To perform elementwise multiplication, use '.*'.


## MTrick

In [52]:

class Mtrick:
    def __init__(self, alpha=2.4, beta=2.4, numCluster=15, maxIter=200):
        self.alpha = alpha
        self.beta = beta
        self.numCluster = numCluster
        self.maxIter = maxIter
        return

    def fit_predict(self, Xs, Xt, Ys):
        inputPath = 'data.mat'
        eng = matlab.engine.start_matlab()
        sio.savemat('data.mat',{'TrainData': Xs.T, 'TrainLabel': Ys, 'TestData': Xt.T})
        Y_pred = eng.MTrick_enterFunc(self.alpha, self.beta, self.numCluster, self.maxIter, inputPath)
        eng.exit()
        Y_pred = np.asarray(Y_pred)
        print(Y_pred.shape)
        Y_pred = np.reshape(Y_pred, (Y_pred.shape[1],)) 
        return Y_pred


In [58]:
datasets = ['K', 'D', 'B', 'E']
#datasets = ['K', 'D']

results = {}
for dataset1 in datasets:
    for dataset2 in datasets:
        if dataset1 == dataset2:
            continue
        [Xs, Ys, X_test, Y_test, Xt]=joblib.load(
                    os.path.join(basepath, dataset1+'-'+dataset2+'.pkl'))
        Xs = Xs.astype('float')
        X_test = X_test.astype('float')
        Xt = Xt.astype('float')
        model = Mtrick()
        Y_pred = model.fit_predict(Xs, X_test, Ys)
        print(len(Y_pred))
        print(len(Y_test))
        acc = accuracy_score(Y_pred,Y_test)
        print(dataset1, dataset2, acc)
        results[dataset1+dataset2] = acc

with open("Mtrick_record.json",'w') as json_file:	
		json.dump(results, json_file)

(1, 2000)
2000
2000
K D 0.737
(1, 2000)
2000
2000
K B 0.689
(1, 2000)
2000
2000
K E 0.819
(1, 2000)
2000
2000
D K 0.753
(1, 2000)
2000
2000
D B 0.715
(1, 2000)
2000
2000
D E 0.7565
(1, 2000)
2000
2000
B K 0.802
(1, 2000)
2000
2000
B D 0.784
(1, 2000)
2000
2000
B E 0.786
(1, 2000)
2000
2000
E K 0.808
(1, 2000)
2000
2000
E D 0.7265
(1, 2000)
2000
2000
E B 0.7055
