# WANN experiments on Sentiment Analysis dataset

In [2]:
import copy
import sys

import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.constraints import MinMaxNorm

sys.path.append("..\\wann")
from utils import sa, BaggingModels, cross_val
from sa_experiments import run_sa_experiments
from methods import *

from warnings import filterwarnings
filterwarnings('ignore')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

## Load Dataset

In [3]:
### 4 domains: electronics, books, dvd, kitchen
source = 'electronics'
target = 'dvd'

N = 50   # Number of labeled target data
m = 700  # Number of labeled source data
n = 700  # Number of unlabeled target data

X, y, src_index, tgt_index = sa(source, target)
mu = y[src_index].mean(); std = y[src_index].std()
y = (y-y[src_index].mean())/y[src_index].std()
shape = X.shape[1]

np.random.seed(0)
src_index = np.random.choice(src_index, m, replace=False)
tgt_index, tgt_test_index = train_test_split(tgt_index, train_size=n, test_size=1000)
tgt_train_index = np.random.choice(tgt_index, N, replace=False)
train_index = np.concatenate((src_index, tgt_train_index))

## Base Estimator

In [11]:
def get_base_model(shape, activation=None, C=1, name="BaseModel"):
    inputs = Input(shape=(shape,))
    modeled = Dense(100, activation='relu',
                         kernel_constraint=MinMaxNorm(0, C),
                         bias_constraint=MinMaxNorm(0, C))(inputs)
    modeled = Dropout(0.5)(modeled)
    modeled = Dense(10, activation='relu',
                         kernel_constraint=MinMaxNorm(0, C),
                         bias_constraint=MinMaxNorm(0, C))(modeled)
    modeled = Dropout(0.2)(modeled)
    modeled = Dense(1, activation=activation,
                    kernel_constraint=MinMaxNorm(0, C),
                    bias_constraint=MinMaxNorm(0, C))(modeled)
    model = Model(inputs, modeled, name=name)
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


def get_encoder(shape, C=1, name="encoder"):
    inputs = Input(shape=(shape,))
    modeled = Dense(100, activation='relu',
                         kernel_constraint=MinMaxNorm(0, C),
                         bias_constraint=MinMaxNorm(0, C))(inputs)
    modeled = Dropout(0.5)(modeled)
    modeled = Dense(10, activation='relu',
                         kernel_constraint=MinMaxNorm(0, C),
                         bias_constraint=MinMaxNorm(0, C))(modeled)
    modeled = Dropout(0.2)(modeled)
    model = Model(inputs, modeled)
    model.compile(optimizer="adam", loss='mean_squared_error')
    return model


def get_task(shape, C=1, activation=None, name="task"):
    inputs = Input(shape=(shape,))
    modeled = Dense(1, activation=activation,
                         kernel_constraint=MinMaxNorm(0, C),
                         bias_constraint=MinMaxNorm(0, C))(inputs)
    model = Model(inputs, modeled)
    model.compile(optimizer="adam", loss='mean_squared_error')
    return model



base_estimator = BaggingModels(func=get_base_model,
                               n_models=1,
                               n_jobs=None,
                               shape=shape,
                               C=1,
                               random_state=0)
fit_params = dict(epochs=200,
                  batch_size=64,
                  verbose=0)

## Examples

### No reweight

In [4]:
no_reweight = copy.deepcopy(base_estimator)
no_reweight.fit(X[train_index], y[train_index], **fit_params)

y_pred = no_reweight.predict(X)
score= mean_squared_error(y[tgt_test_index], y_pred[tgt_test_index])
print('Target score: %.3f'%score)

Target score: 0.997


### TrAdaBoostR2

In [6]:
tradaboost = TwoStageTrAdaBoostR2(func=get_base_model,
                                  verbose=1,
                                  n_jobs=None,
                                  C=1,
                                  random_state=0,
                                  shape=X.shape[1])
tradaboost.fit(X, y, [src_index, tgt_train_index], **fit_params)
y_pred = tradaboost.predict(X)
score= mean_squared_error(y[tgt_test_index], y_pred[tgt_test_index])
print('Target score: %.3f'%score)

cv error of estimator 0: 0.937 (0.2184746222)
cv error of estimator 1: 0.967 (0.2514102849)
cv error of estimator 2: 1.041 (0.3927827499)
cv error of estimator 3: 1.164 (0.5074053409)
cv error of estimator 4: 1.238 (0.6250339594)
cv error of estimator 5: 1.295 (0.7800075651)
cv error of estimator 6: 1.289 (0.8230790703)
cv error of estimator 7: 1.155 (0.7169947271)
cv error of estimator 8: 0.993 (0.5506487518)
cv error of estimator 9: 0.982 (0.4590033025)
Target score: 0.988


### KMM

In [7]:
kmm = KMM(base_estimator)
kmm.fit(X, y, index=[src_index, tgt_index, tgt_train_index], **fit_params)

y_pred = kmm.predict(X)
score= mean_squared_error(y[tgt_test_index], y_pred[tgt_test_index])
print('Target score: %.3f'%score)

Target score: 1.087


### KLIEP

In [8]:
kliep = KLIEP(base_estimator)
kliep.fit(X, y, index=[src_index, tgt_index, tgt_train_index], **fit_params)

y_pred = kliep.predict(X)
score= mean_squared_error(y[tgt_test_index], y_pred[tgt_test_index])
print('Target score: %.3f'%score)

Target score: 0.996


### DANN

In [12]:
dann = BaggingModels(DANN, n_models=1, n_jobs=None, random_state=0,
                     get_encoder=get_encoder, get_task=get_task, lambda_=0.02)

dann.fit(X, y, index=[src_index, tgt_index, tgt_train_index], **fit_params)

y_pred = dann.predict(X)
score = mean_squared_error(y[tgt_test_index], y_pred[tgt_test_index])
print('Target score: %.3f'%score)

Target score: 1.150


### WANN

In [13]:
wann = BaggingModels(WANN, n_models=1, n_jobs=None, random_state=0,
                     get_base_model=get_base_model, C=1, C_w=0.2)

wann.fit(X, y, index=[src_index, tgt_train_index], **fit_params)

y_pred = wann.predict(X)
score = mean_squared_error(y[tgt_test_index], y_pred[tgt_test_index])
print('Target score: %.3f'%score)

Target score: 0.963


#### Cross Validation Cw Selection

In [14]:
cross_val("WANN", X, y, src_index, None, tgt_train_index,
          params=[0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1],
          fit_params=fit_params, cv=5,
          get_base_model=get_base_model, C=1);

Cross Validation: param = 0.010 | score = 1.0933
Cross Validation: param = 0.020 | score = 1.0744
Cross Validation: param = 0.050 | score = 1.0392
Cross Validation: param = 0.100 | score = 1.0090
Cross Validation: param = 0.200 | score = 0.9978
Cross Validation: param = 0.500 | score = 1.0255
Cross Validation: param = 1.000 | score = 1.0535
Best: param = 0.200 | score = 0.9978


## Run Experiments

In [15]:
df = run_sa_experiments(method="WANN",
                        get_base_model=get_base_model,
                        get_task=get_task,
                        get_encoder=get_encoder,
                        C=1,
                        C_w=0.2,
                        lambda_=0.1,
                        sigma=0.1,
                        epochs=200,
                        batch_size=64,
                        n_models=1,
                        n_jobs=None,
                        n_source=700,
                        n_target_unlabeled=700,
                        n_target_labeled=50,
                        n_target_test=1000,
                        random_state=0,
                        save=False)

Experiment for method: WANN


############# dvd #############
--------- books ----------
Target_score: 0.990
--------- electronics ----------
Target_score: 0.897
--------- kitchen ----------
Target_score: 0.977
############# books #############
--------- dvd ----------
Target_score: 1.039
--------- electronics ----------
Target_score: 1.046
--------- kitchen ----------
Target_score: 1.019
############# electronics #############
--------- dvd ----------
Target_score: 0.963
--------- books ----------
Target_score: 1.000
--------- kitchen ----------
Target_score: 0.823
############# kitchen #############
--------- dvd ----------
Target_score: 0.931
--------- books ----------
Target_score: 1.082
--------- electronics ----------
Target_score: 0.834


### Launch all experiments (all methods, 10 times)
Uncomment cell below to launch experiments

In [None]:
# %run -i ..\wann\sa_experiments