In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import optim
from geomloss import SamplesLoss
from torch.autograd import Function
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.nn.functional import normalize
#from torchmetrics.classification import BinaryAccuracy
from torchmetrics.classification import BinaryF1Score
torch.manual_seed(0)

<torch._C.Generator at 0x7f22c4234730>

In [17]:
class Data(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X.astype(np.float32))
        self.y = torch.from_numpy(y).type(torch.LongTensor)
        self.len = self.X.shape[0]
       
    def __getitem__(self, index):
        return self.X[index], self.y[index]
   
    def __len__(self):
        return self.len

In [18]:
def get_data(data_type,file_num):

    if(data_type=='train'):
        data=pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_train_{file_num}.csv")
    else:
        data = pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_test_{file_num}.csv")

    x_data=pd.concat([data.iloc[:,0], data.iloc[:, 1:30]], axis = 1)
    #x_data=data.iloc[:, 5:30]
    y_data=data.iloc[:, 0]
    return x_data,y_data

In [19]:
def get_dataloader(x_data,y_data,batch_size):

    x_train_sr=x_data[x_data['treatment']==0]
    y_train_sr=y_data[x_data['treatment']==0]
    x_train_tr=x_data[x_data['treatment']==1]
    y_train_tr=y_data[x_data['treatment']==1]


    train_data_sr = Data(np.array(x_train_sr), np.array(y_train_sr))
    train_dataloader_sr = DataLoader(dataset=train_data_sr, batch_size=batch_size)

    train_data_tr = Data(np.array(x_train_tr), np.array(y_train_tr))
    train_dataloader_tr = DataLoader(dataset=train_data_tr, batch_size=batch_size)


    return train_dataloader_sr, train_dataloader_tr

In [29]:
input_dim=25
hidden_layers=50
output_dim=2

In [30]:
class Discriminator(nn.Module):
  def __init__(self):
    super(Discriminator, self).__init__()
    self.linear1 = nn.Linear(input_dim, hidden_layers)
    self.linear2 = nn.Linear(hidden_layers, output_dim)
  def forward(self, x):
    x = torch.sigmoid(self.linear1(x))
    x = self.linear2(x)
    return x

In [31]:
clf = Discriminator()

In [32]:
epochs=200
batch_size=16
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(clf.parameters(), lr=0.1)

In [33]:
pehe_lo = []
domain_lo=[]
s_reg_lo=[]
t_reg_lo=[]
d_accu=[]
metric=BinaryF1Score()
num_files=45
for nf in range(44,num_files):
    x_data,y_data=get_data('train',nf)

    #optimizer3 = optim.SGD(Network.parameters(),lr=0.001,momentum=1e-8)
    optimizer = torch.optim.SGD(clf.parameters(), lr=0.1)

    for ep in range(1,epochs+1 ):
        s_reg=0
        t_reg=0
        dl=0
        peh=0
        running_loss = 0.0
        train_dataloader_sr, train_dataloader_tr=get_dataloader(x_data,y_data,batch_size)

        for batch_idx, (train_source_data, train_target_data) in enumerate(zip(train_dataloader_sr, train_dataloader_tr)):
            xs,ys=train_source_data
            xt,yt=train_target_data
            xs_train=xs[:,5:30]
            xt_train=xt[:,5:30]
            inputs=torch.cat((xs_train,xt_train),0)
            labels=torch.cat((ys,yt),0)
            # set optimizer to zero grad to remove previous epoch gradients
            optimizer.zero_grad()
            # forward propagation
            outputs = clf(inputs)
            loss = criterion(outputs, labels)
            # backward propagation
            loss.backward()
            # optimize
            optimizer.step()
            running_loss += loss.item()
          # display statistics
        print(f'[{ep + 1}, {batch_idx + 1:5d}] loss: {running_loss / 2000:.5f}')

[2,     8] loss: 0.00278
[3,     8] loss: 0.00276
[4,     8] loss: 0.00274
[5,     8] loss: 0.00272
[6,     8] loss: 0.00270
[7,     8] loss: 0.00268
[8,     8] loss: 0.00266
[9,     8] loss: 0.00264
[10,     8] loss: 0.00262
[11,     8] loss: 0.00260
[12,     8] loss: 0.00258
[13,     8] loss: 0.00255
[14,     8] loss: 0.00253
[15,     8] loss: 0.00250
[16,     8] loss: 0.00247
[17,     8] loss: 0.00245
[18,     8] loss: 0.00242
[19,     8] loss: 0.00239
[20,     8] loss: 0.00236
[21,     8] loss: 0.00233
[22,     8] loss: 0.00230
[23,     8] loss: 0.00227
[24,     8] loss: 0.00224
[25,     8] loss: 0.00221
[26,     8] loss: 0.00218
[27,     8] loss: 0.00215
[28,     8] loss: 0.00212
[29,     8] loss: 0.00209
[30,     8] loss: 0.00206
[31,     8] loss: 0.00204
[32,     8] loss: 0.00201
[33,     8] loss: 0.00198
[34,     8] loss: 0.00195
[35,     8] loss: 0.00193
[36,     8] loss: 0.00190
[37,     8] loss: 0.00187
[38,     8] loss: 0.00185
[39,     8] loss: 0.00182
[40,     8] loss: 0.

In [34]:
metric=BinaryF1Score()
x_data,y_data=get_data('test',44)
test_dataloader_sr, test_dataloader_tr=get_dataloader(x_data,y_data,batch_size)
correct, total = 0, 0
# no need to calculate gradients during inferenSce
with torch.no_grad():
  for batch_idx, (test_source_data, test_target_data) in enumerate(zip(test_dataloader_sr, test_dataloader_tr)):
    xs,ys=test_source_data
    xt,yt=test_target_data
    xs_test=xs[:,5:30]
    xt_test=xt[:,5:30]
    inputs=torch.cat((xs_test,xt_test),0)
    labels=torch.cat((ys,yt),0)
    #inputs, labels = data
    # calculate output by running through the network
    outputs = clf(inputs)
    # get the predictions
    __, predicted = torch.max(outputs.data, 1)
    # update results
    total += labels.size(0)
    print('F1 score: ',round(metric(predicted,labels).item(),3))
    correct += (predicted == labels).sum().item()
print('Accuracy: ',100 * correct // total )

F1 score:  0.923
Accuracy:  93


In [15]:
print(labels.shape)

torch.Size([29])
