# Import Modules

In [1]:
import pandas as pd
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression

# Prepare Data

In [2]:
source_x = pd.read_csv("./data/2_X_train.csv")
source_y = pd.read_csv("./data/2_Y_train.csv")
source_x_y = pd.concat([source_x, source_y], axis=1)
source_x_y["Domain"] = 1

target_x = pd.read_csv("./data/5_X_train.csv")
target_y = pd.read_csv("./data/5_Y_train.csv")
target_x_y = pd.concat([target_x, target_y], axis=1)
target_x_y["Domain"] = 0

num_for_domain = 900
data_for_domain = pd.concat([source_x_y, target_x_y[:num_for_domain]], axis=0)
data_for_task = target_x_y[num_for_domain:]
data_for_task = data_for_task.drop(columns="Domain")

In [3]:
source_x_y

Unnamed: 0,Ratio,Mean,Max,Min,Std,Range,Lag-1_Ratio,Lag+1_Ratio,Season,Sunday,Am,Lunch,Pm,Occupancy,Domain
0,0.094933,2.878349e+05,380827.893,194841.812,92993.0405,185986.081,0.222852,0.045902,1,0,1,0,0,1,1
1,0.045902,1.391734e+05,155853.417,122493.428,16679.9945,33359.989,0.094933,0.049253,1,0,1,0,0,0,1
2,0.049253,1.493353e+05,158295.543,140374.991,8960.2760,17920.552,0.045902,0.069265,1,0,1,0,0,0,1
3,0.069265,1.500626e+05,168052.597,132072.549,17990.0240,35980.048,0.049253,0.082001,1,0,1,0,0,0,1
4,0.082001,1.582039e+05,170529.816,145877.968,12325.9240,24651.848,0.069265,0.066992,1,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1563,0.047140,1.304723e+05,132273.838,128670.815,1801.5115,3603.023,0.048721,0.119312,0,0,0,0,1,0,1
1564,0.119312,3.672752e+05,574214.753,160335.592,206939.5805,413879.161,0.047140,0.321851,0,0,0,0,1,0,1
1565,0.321851,9.907495e+05,996765.945,984733.152,6016.3965,12032.793,0.119312,0.356091,0,0,0,0,1,1,1
1566,0.356091,1.096151e+06,1305499.967,886801.848,209349.0595,418698.119,0.321851,0.263492,0,0,0,0,1,1,1


In [4]:
domain_discriminator = LogisticRegression()
task_classifier = LogisticRegression()

# Domain Discrimination

In [5]:
x_for_domain = data_for_domain.drop(columns=["Domain", "Occupancy"])
scaler = preprocessing.StandardScaler()
x_for_domain = scaler.fit_transform(x_for_domain)

y_for_domain = data_for_domain["Domain"]

In [6]:
domain_discriminator.fit(x_for_domain, y_for_domain.values)

LogisticRegression()

In [7]:
transferred_mask = domain_discriminator.predict(x_for_domain[:-num_for_domain]) == 0
num_transffered = sum(transferred_mask)
print(f"{num_transffered} samples transferred from Source to Target")

73 samples transferred from Source to Target


# Task Classification

In [8]:
transferred_data = pd.concat([data_for_domain[:-num_for_domain][transferred_mask], data_for_domain[-num_for_domain:]], axis=0) 
transferred_data = transferred_data.drop(columns=["Domain"])

transferred_x = transferred_data.drop(columns="Occupancy").values
transferred_x = scaler.fit_transform(transferred_x)
transferred_y = transferred_data["Occupancy"].values

task_x = data_for_task.drop(columns="Occupancy").values
task_y = data_for_task["Occupancy"].values

In [9]:
task_classifier.fit(transferred_x, transferred_y)
pred_y = task_classifier.predict(task_x)
acc = sum(pred_y == task_y)/task_y.shape[0]

print(f"+Adversarial Validation Accuracy:{acc}")

+Adversarial Validation Accuracy:0.8309859154929577


In [10]:
task_classifier.fit(transferred_x[num_transffered:], transferred_y[num_transffered:])
pred_y = task_classifier.predict(task_x)
acc = sum(pred_y == task_y)/task_y.shape[0]

print(f"-Adversarial Validation Accuracy:{acc}")

-Adversarial Validation Accuracy:0.8309859154929577
