In [None]:
import sys
import os
import joblib
import json

import math
import numpy as np
import numpy.matlib
import scipy as sp
import scipy.io as sio 
import scipy.sparse as scp
from scipy.sparse.linalg import svds as SVD

import sklearn 
from sklearn import svm
from sklearn import linear_model
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=RuntimeWarning) 
warnings.filterwarnings("ignore", category=ConvergenceWarning)


In [None]:
from google.colab import drive

drive.mount('/content/gdrive')
basepath = "/content/gdrive/My Drive/presentation1"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


### Sample positive Amazon review for books

'woman_the:1 contains_the:1 fan_i:1 alex_ross(superman:1 justice:1 read:1 comics_fan:1 again:1 league_etc:1 fans:1 recieved:1 hanna-barbera!)_a:1 book_fans:1 wonder:1 gift:1 **gorgeous_artwork:1** gift_and:1 contains:1 i_recieved:1 artwork:2 christmas:1 read_it:1 wonder_woman:1 justice_league:1 a_comics:1 again_and:1 even:1 i_read:1 the_most:2 gorgeous:1 of_alex:1 **i:2** **extraordinary:1** most_gorgeous:1 most:2 it_again:1 comic_books:1 and_i:1 ross(superman_batman:1 etc_even:1 etc:1 the_justice:1 **fan:1** **beautiful:1** again.a:1 even_hanna-barbera!):1 comics:1 batman_wonder:1 for_comic:1 in_comic:1 artwork_in:1 books_contains:1 woman:1 a_christmas:1 extraordinary_artwork:1 books:1 christmas_gift:1 ross(superman:1 league:1 artwork_of:1 most_extraordinary:1 comic_book:1 book:1 recieved_this:1 batman:1 must-have_for:1 hanna-barbera!):1 **must-have:1** again.a_must-have:1 alex:1 and_again.a:1 comic:2 #label#:positive'

### Top 20 words by frequency

'i',
 'you',
 'not',
 'was',
 '<num>',
 'my',
 'one',
 'book',
 'so',
 'they',
 'all',
 'if',
 'very',
 'about',
 'just',
 'like',
 'great',
 'his',
 'out',
 'good',
 

## Spectral Feature Alignment (SFA)

In [None]:
class SFA:
    '''
    spectral feature alignment
    '''
    def __init__(self,l=500,K=100, base_classifer=svm.SVC()):
        self.l = l # number of domain-independent features
        self.K = K # number of clusters
        self.m = 0 # number of domain-specific features
        self.ut = None # eigen-vectors from spectral decomposition
        self.gamma = 1 # tradeoff parameter
        self.base_classifer = base_classifer
        self.ix = None # index of domain-independent features
        self._ix = None # index of domain-specific features
        return

    def fit(self, Xs,Xt):

        # 1. Select domain-specific and domain-dependent features
        # _______________________________________________________

        # Sort indices by highest sum of columns (highest word frequency)
        ix_s = np.argsort(np.sum(Xs, axis=0))
        ix_t = np.argsort(np.sum(Xt, axis=0))
        ix_s = ix_s[::-1][:self.l]
        ix_t = ix_t[::-1][:self.l]

        # Instersect words with highest word frequency in both source and target
        ix = np.intersect1d(ix_s, ix_t)

        # Complement of previous index
        _ix = np.setdiff1d(range(Xs.shape[1]), ix)
        self.ix = ix # index of domain-independent features
        self._ix = _ix # index of domain-specific features
        self.l = len(ix) # number of domain-independent features
        self.m = len(_ix) # number of domain-specific features

        X = np.concatenate((Xs, Xt), axis=0)
        
        # 2. Construct co-occurrence matrix between domain specific / independent
        #________________________________________________________
        
        DI = (X[:, ix]>0).astype('float') # Domain independent 
        DS = (X[:, _ix]>0).astype('float') # Domain specific

        M = np.zeros((self.m,self.l))
        for i in range(X.shape[0]):
            tem1 = np.reshape(DS[i], (1, self.m))
            tem2 = np.reshape(DI[i], (1, self.l))
            M += np.matmul(tem1.T, tem2)
        M = M/np.linalg.norm(M, 'fro')
        M = scp.lil_matrix(M)

        # 3. Create Laplacian and obtain top K eigenvectors
        #________________________________________________________       
        D1 = scp.lil_matrix((self.m, self.m))
        D2 = scp.lil_matrix((self.l, self.l))
        for i in range(self.m):
            D1[i,i] = 1.0/np.sqrt(np.sum(M[i,:]))
        for i in range(self.l):
            D2[i,i] = 1.0/np.sqrt(np.sum(M[:,i]))
        B = (D1.tocsr().dot(M.tocsr())).dot(D2.tocsr())
        ut, s, vt = SVD(B.tocsc(), k=self.K)
        self.ut = ut
        return ut

    def transform(self, X):
        # Feature alignment mapping function
        return np.concatenate((X, self.gamma*X[:, self._ix].dot(self.ut)), axis=1)

    def fit_predict(self,Xs, Xt, X_test, Ys, Y_test):
        # Obtained tranformed features through spectral alignment
        ut = self.fit(Xs, Xt)
        Xs = self.transform(Xs)
        # Build classifier with concatenated Xs features and new features
        self.base_classifer.fit(Xs, Ys)
        X_test = self.transform(X_test)
        y_pred = self.base_classifer.predict(X_test)
        acc = accuracy_score(Y_test, y_pred)
        return acc

In [None]:
datasets = ['K', 'D', 'B', 'E']
#datasets = ['K', 'D']

results = {}
for dataset1 in datasets:
    for dataset2 in datasets:
        if dataset1 == dataset2:
            continue
        [Xs, Ys, X_test, Y_test, Xt]=joblib.load(
                    os.path.join(basepath, dataset1+'-'+dataset2+'.pkl'))
        Xs = Xs.astype('float')
        X_test = X_test.astype('float')
        Xt = Xt.astype('float')
        model = SFA()
        acc = model.fit_predict(Xs, Xt, X_test, Ys, Y_test)
        print(dataset1, dataset2, acc)
        results[dataset1+dataset2] = acc

with open("SFA_record.json",'w') as json_file:	
		json.dump(results, json_file)

K D 0.723
K B 0.7105
K E 0.819
D K 0.7815
D B 0.7525
D E 0.7585
B K 0.787
B D 0.7705
B E 0.74
E K 0.8255
E D 0.716
E B 0.7005


## Structural Correspondence Learning (SCL)

In [None]:
class SCL(object):
    '''
    class of structural correspondence learning 
    '''
    def __init__(self, l2=1.0, num_pivots=10, base_classifer=LinearSVC()):
        self.l2 = l2
        self.num_pivots = num_pivots
        self.W = 0
        self.base_classifer = base_classifer
        # self.train_data_dim = None

    def fit(self, Xs, Xt):
        '''
        find pivot features and transfer the Xs and Xt
        Param Xs: source data
        Param Xt: target data
        output Xs_new: new source data features
        output Xt_new: new target data features
        output W: transform matrix
        '''
        _, ds = Xs.shape
        _, dt = Xt.shape
        assert ds == dt

        # 1. Select pivots (domain-agnostic words in both spaces)
        # _______________________________________________________

        # Look for num_pivots words with highest frequency in both Xs and Xt
        X = np.concatenate((Xs, Xt), axis=0)
        ix = np.argsort(np.sum(X, axis=0))
        ix = ix[::-1][:self.num_pivots]
        pivots = (X[:, ix]>0).astype('float')
        p = np.zeros((ds, self.num_pivots))
        
        # 2. Train classifiers to "predict" each pivot. Create num_pivot features
        # per pivot. We want to find correlation between pivot and the rest of data
        # _______________________________________________________

        for i in range(self.num_pivots):
            clf = linear_model.SGDClassifier(loss="modified_huber", alpha=self.l2)
            clf.fit(X, pivots[:, i])
            p[:, i] = clf.coef_

        # 3. Obtain top num_pivots eigenvectors of p to reduce dimenstionality
        # _______________________________________________________
        _, W = np.linalg.eig(np.cov(p))
        W = W[:, :self.num_pivots].astype('float')
        self.W = W
        Xs_new = np.concatenate((np.dot(Xs, W), Xs), axis=1)
        Xt_new = np.concatenate((np.dot(Xt, W), Xt), axis=1)

        return Xs_new, Xt_new, W

    def transform(self, X):
        '''
        transform the original data by adding new features
        Param X: original data
        output x_new: X with new features
        '''
        X_new = np.concatenate((np.dot(X, self.W),X), axis=1)
        return X_new
    
    def fit_predict(self, Xs, Xt, X_test, Ys, Y_test):
      # Obtained tranformed features through SCL
        self.fit(Xs, Xt)
        Xs = self.transform(Xs)
        # Build classifier with concatenated Xs features and new features
        self.base_classifer.fit(Xs, Ys)
        X_test = self.transform(X_test)
        y_pred = self.base_classifer.predict(X_test)
        acc = accuracy_score(Y_test, y_pred)
        return acc

In [None]:
datasets = ['K', 'D', 'B', 'E']
#datasets = ['K', 'D']

results = {}
for dataset1 in datasets:
    for dataset2 in datasets:
        if dataset1 == dataset2:
            continue
        [Xs, Ys, X_test, Y_test, Xt]=joblib.load(
                    os.path.join(basepath, dataset1+'-'+dataset2+'.pkl'))
        Xs = Xs.astype('float')
        X_test = X_test.astype('float')
        Xt = Xt.astype('float')
        model = SCL()
        acc = model.fit_predict(Xs, Xt, X_test, Ys, Y_test)
        print(dataset1, dataset2, acc)
        results[dataset1+dataset2] = acc

with open("SCL_record.json",'w') as json_file:	
		json.dump(results, json_file)

K D 0.7225
K B 0.6625
K E 0.8115
D K 0.7455
D B 0.712
D E 0.726
B K 0.7515
B D 0.763
B E 0.737
E K 0.8345
E D 0.7005
E B 0.6975


 ## Marginalized Denoising Autoencoders for Domain Adaptation (mSDA)

In [None]:
class mSDA(object):
    '''
    Implement mSDA.
    To read more about the SDA, check the following paper:
        Chen M , Xu Z , Weinberger K , et al.
        Marginalized Denoising Autoencoders for Domain Adaptation[J].
        Computer Science, 2012.
    This implementation of mSDA is based on both the sample code the authors provided
    as well as the equations in the paper.
    The code is modified according to https://github.com/douxu896/mSDA
    '''
    def __init__(self, p=None, l=5, act=np.tanh, Ws=None, bias=True):
        '''
        :param p: corruption probability
        :param l: number of layers
        :param act: what nonlinearity to use? if None, not to use nonlinearity.
        :param Ws: model parameters. Can optionally pass in precomputed Ws to use to transform X.
                (e.g. if transforming test X with Ws learned from training X)
        :param bias: Whether to use bias?
        '''
        self.p = p
        self.l = l
        self.act = act
        self.Ws = Ws
        self.bias = bias

    def mDA(self, X, W=None):
        '''
        One layer Marginalized Denoising Autoencoder.
        Learn a representation h of X by reconstructing "corrupted" input but marginalizing out corruption
        :param X: input features, shape:(num_samples,num_features)
        :param W: model parameters. Can optionally pass in precomputed W to use to transform X.
                (e.g. if transforming test X with W learned from training X)
        :return: model parameters, reconstructed representation.
        '''
        if self.bias:
            X=np.hstack((X, np.ones((X.shape[0], 1))))
        if W is None:
            W = self._compute_reconstruction_W(X)
        h = np.dot(X, W)  # no nonlinearity
        if self.act is not None:
            h = self.act(h)  # inject nonlinearity
        return W, h

    def _compute_reconstruction_W(self, X):
        '''
        Learn reconstruction parameters.
        :param X: input features, shape:(num_samples,num_features)
        :return: model parameters.
        '''
        # typecast to correct Xtype
        X.dtype = "float64"
        d = X.shape[1]
        # Represents the probability that a given feature will be corrupted
        if self.bias:
            q = np.ones(
                (d-1, 1)) * (1 - self.p)
            # add bias probability
            q=np.vstack((q,1))
        else:
            q = np.ones(
                (d, 1)) * (1 - self.p)

        S = np.dot(X.transpose(), X)
        Q = S * (np.dot(q, q.transpose()))
        Q[np.diag_indices_from(Q)] = q[:,0] * np.diag(S)
        P = S * numpy.matlib.repmat(q, 1, d)

        # solve equation of the form W = BA^-1
        A = Q + 10**-5 * np.eye(d)
        B = P[:-1,:]
        W = np.linalg.solve(A.transpose(), B.transpose())
        return W

    def fit(self, X):
        '''
        Stack mDA layers on top of each other, using previous layer as input for the next
        :param X: input features, shape:(num_samples,num_features)
        :return: None
        '''
        Ws = list()
        hs = list()
        hs.append(X)
        for layer in range(0, self.l):
            W, h = self.mDA(hs[-1])
            Ws.append(W)
            hs.append(h)
        self.Ws = Ws

    def transform(self, X):
        '''
        Should be called after fit!
        Stack mDA layers on top of each other, using previous layer as input for the next
        :param X: input features, shape:(num_samples,num_features)
        :return: reconstructed representation of the last layer.
        '''
        if self.Ws is None:
            raise ValueError('Please fit on some data first.')
        hs = list()
        hs.append(X)
        for layer in range(0, self.l):
            _, h = self.mDA(hs[-1], self.Ws[layer])
            hs.append(h)
        return hs[-1]

    def fit_transform(self, X):
        '''
        Stack mDA layers on top of each other, using previous layer as input for the next
        :param X: input features, shape:(num_samples,num_features)
        :return: reconstructed representation of the last layer.
        '''
        Ws = list()
        hs = list()
        hs.append(X)
        for layer in range(0, self.l):
            W, h = self.mDA(hs[-1])
            Ws.append(W)
            hs.append(h)
        self.Ws = Ws
        return hs[-1]


In [None]:
# test implementation

# load dataset1
[Xs_train, Ys_train, Xs_test, Ys_test, Xs_unlabeled]=joblib.load(
            os.path.join(basepath, 'K-D.pkl'))
# load dataset2
[Xt_train, Xt_train_label, Xt_test, Xt_test_label, Xt_unlabeled]=joblib.load(
            os.path.join(basepath, 'D-K.pkl'))

from sklearn import svm

clf = svm.SVC().fit(Xs_train, Ys_train)
preds_Xs = clf.predict(Xs_test)
acc = np.mean(preds_Xs == Ys_test)
print("Xs acc on regular X: ", acc)
preds_Xt = clf.predict(Xt_test)
acc = np.mean(preds_Xt == Xt_test_label)
print("Xt acc on regular X: ", acc)

# set corruption probability, number of layers and bias.
pp = 0.3
ll = 5
bias = True
train_X=np.concatenate((Xs_train,Xs_unlabeled,Xt_train,Xt_unlabeled),axis=0)
msda=mSDA(p=pp, l=ll,act=np.tanh, Ws=None, bias=True)
msda.fit(train_X)
Xs_reps = msda.transform(Xs_train)
print("Shape of mSDA Xs_reps h: ", Xs_reps.shape)
Xs_test_reps = msda.transform(Xs_test)
print("Shape of mSDA Xs_test_reps h: ", Xs_test_reps.shape)
Xt_reps = msda.transform(Xt_test)
print("Shape of mSDA Xt_test_reps h: ", Xt_reps.shape)

clf = svm.SVC().fit(Xs_reps, Ys_train)
preds_Xs=clf.predict(Xs_test_reps)
acc=np.mean(preds_Xs == Ys_test)
print("Xs acc with linear SVM on mSDA features: ", acc)
preds_Xt = clf.predict(Xt_reps)
acc = np.mean(preds_Xt == Xt_test_label)
print("Xt acc with linear SVM on mSDA features: ", acc)


Xs acc on regular X:  0.711
Xt acc on regular X:  0.9725
Shape of mSDA Xs_reps h:  (2000, 5000)
Shape of mSDA Xs_test_reps h:  (2000, 5000)
Shape of mSDA Xt_test_reps h:  (2000, 5000)
Xs acc with linear SVM on mSDA features:  0.8
Xt acc with linear SVM on mSDA features:  0.965
