In [73]:
# load packages:
# package requirement: tensorflow-gpu 1.15.2, keras 2.3.1 munkres
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as pathces
import sys

from mmDUFS import JointModel
from mmDUFS import DiffModel
from mmDUFS import DataSet

from baselines import *

from sklearn.metrics import f1_score
from scipy import stats

In [44]:
!nvidia-smi

Thu Jun  1 01:14:09 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 520.61.05    Driver Version: 520.61.05    CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:1A:00.0 Off |                  N/A |
| 29%   29C    P8    16W / 250W |  10775MiB / 11264MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:1B:00.0 Off |                  N/A |
| 29%   30C    P8    11W / 250W |  10826MiB / 11264MiB |      0%      Default |
|       

In [45]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [106]:
# load the data
tree_data = np.load("./datasets/synthetic_tree.npz",allow_pickle=True)
X_z=tree_data['X']
Y_z=tree_data['Y']
y=tree_data['y']
gt_feat_joint_X=tree_data['gt_feat_joint_X']
gt_feat_joint_Y=tree_data['gt_feat_joint_Y']
gt_feat_diff_X=tree_data['gt_feat_diff_X']
gt_feat_diff_Y=tree_data['gt_feat_diff_Y']

In [107]:
# run baselines
baseline_f1 = fs_eval(X = X_z,
        Y =  Y_z,
        label_true_X = gt_feat_joint_X,
        label_true_Y = gt_feat_joint_Y,
        baselines = ["concat","sum","prod"],
        nx=np.sum(gt_feat_joint_X == 1),
        ny=np.sum(gt_feat_joint_Y == 1), 
        knn=2,fac=5, laplacian="normalized")
   

In [109]:
baseline_f1

{'concat': {'X': 0.6, 'Y': 0.78},
 'sum': {'X': 0.78, 'Y': 0.8000000000000002},
 'prod': {'X': 0.8399999999999999, 'Y': 0.82}}

In [49]:
# run mmdufs with shared op

In [110]:
shared_mmdufs_params = {
    "lam1":1e-1, # lambda x
    "lam2":1e-1, # lambda y
    "batch_size":X_z.shape[0], # full batch
    "const2":1e3
}
shared_mmdfus_learning_rate = 2
display_step = 1000
epochs = 25000

In [111]:
dataset = DataSet(**{'_data1':X_z,
                         '_data2':Y_z}
                         )

shared_mmdufs_params["input_dim1"] = X_z.shape[1]
shared_mmdufs_params["input_dim2"] = Y_z.shape[1]
    
model = JointModel(**shared_mmdufs_params) 
    
shared_result= model.train(dataset,
                            learning_rate=shared_mmdfus_learning_rate,
                        feature_label1 = gt_feat_joint_X, 
                        feature_label2 = gt_feat_joint_Y,
                        display_step=display_step, 
                        num_epoch=epochs) 

num_samples : 1000
Epoch: 1000 loss= -2.807910919 score1= -1.421749949 score2= -1.544219255 reg1= 0.829390168 reg2= 0.751193166 f1 - Mod1 =  0.3983 f1 - Mod2 =  0.4848
Epoch: 2000 loss= -2.939616203 score1= -1.474189878 score2= -1.579069018 reg1= 0.645253658 reg2= 0.491172075 f1 - Mod1 =  0.4585 f1 - Mod2 =  0.5311
Epoch: 3000 loss= -3.097668171 score1= -1.504732728 score2= -1.685703158 reg1= 0.515264153 reg2= 0.412414789 f1 - Mod1 =  0.4973 f1 - Mod2 =  0.5595
Epoch: 4000 loss= -3.365438461 score1= -1.594656110 score2= -1.848757505 reg1= 0.415918767 reg2= 0.363833547 f1 - Mod1 =  0.5380 f1 - Mod2 =  0.5987
Epoch: 5000 loss= -4.438420773 score1= -2.008433104 score2= -2.492308378 reg1= 0.322523326 reg2= 0.300684094 f1 - Mod1 =  0.6345 f1 - Mod2 =  0.6763
Epoch: 6000 loss= -5.524314404 score1= -2.446123362 score2= -3.134174585 reg1= 0.285236210 reg2= 0.274599820 f1 - Mod1 =  0.6815 f1 - Mod2 =  0.7121
Epoch: 7000 loss= -7.309978962 score1= -3.101500511 score2= -4.257998943 reg1= 0.237864

In [112]:
selected_feat_common_1 = np.zeros(X_z.shape[1])
selected_feat_common_1[np.argsort(model.get_raw_alpha1())[::-1][:np.sum(gt_feat_joint_X == 1)]] = 1


selected_feat_common_2 = np.zeros(Y_z.shape[1])
selected_feat_common_2[np.argsort(model.get_raw_alpha2())[::-1][:np.sum(gt_feat_joint_Y == 1)]] = 1


In [113]:
f1_score(gt_feat_joint_X,selected_feat_common_1)

0.9

In [114]:
f1_score(gt_feat_joint_Y,selected_feat_common_2)

0.88

In [None]:
# run mmdufs with diff op

In [115]:
diff_mmdufs_params = {
    "lam1":4, # lambda x
    "batch_size":X_z.shape[0], # full batch
    "const":1e-3,
    "const2":1e-3
}
diff_mmdfus_learning_rate = 2
display_step = 1000
epochs = 10000

In [116]:
diff_mmdufs_params["input_dim1"] = X_z.shape[1]
diff_mmdufs_params["input_dim2"] = Y_z.shape[1]

In [117]:
dataset = DataSet(**{'_data1':X_z,
                         '_data2':Y_z,
                    })
    
diff_model1 = DiffModel(**diff_mmdufs_params) 
    
diff_result1= diff_model1.train(dataset,
                              learning_rate=diff_mmdfus_learning_rate,
                        feature_label = gt_feat_diff_X.reshape(-1), 
                        display_step=display_step, 
                        num_epoch=epochs) #30000
  

num_samples : 1000
Epoch: 1000 loss= -2.276521921 score1= -2.985136747 reg1= 0.177153707 f1= 0.9126
Epoch: 2000 loss= -2.275680780 score1= -2.983282089 reg1= 0.176900312 f1= 0.9126
Epoch: 3000 loss= -2.278060675 score1= -2.985318661 reg1= 0.176814482 f1= 0.9126
Epoch: 4000 loss= -2.278174400 score1= -2.985270977 reg1= 0.176774129 f1= 0.9126
Epoch: 5000 loss= -2.278333426 score1= -2.985330343 reg1= 0.176749215 f1= 0.9126
Epoch: 6000 loss= -2.278401852 score1= -2.985341787 reg1= 0.176734969 f1= 0.9126
Epoch: 7000 loss= -2.278467655 score1= -2.985363960 reg1= 0.176724106 f1= 0.9126
Epoch: 8000 loss= -2.278208971 score1= -2.985074043 reg1= 0.176716268 f1= 0.9126
Epoch: 9000 loss= -2.278416157 score1= -2.985258341 reg1= 0.176710561 f1= 0.9126
Epoch: 10000 loss= -2.278525591 score1= -2.985349894 reg1= 0.176706076 f1= 0.9126
Optimization Finished!


In [118]:
f1_score(gt_feat_diff_X.reshape(-1).astype(int),1*(diff_model1.get_prob_alpha1()==1).reshape(-1).astype(int))

0.912621359223301

In [119]:
diff_mmdufs_params = {
    "lam1":2, # lambda y
    "batch_size":Y_z.shape[0], # full batch
    "const":1e-3,
    "const2":1e-3
}
diff_mmdfus_learning_rate = 2
display_step = 1000
epochs = 10000

In [120]:
diff_mmdufs_params["input_dim1"] = Y_z.shape[1]
diff_mmdufs_params["input_dim2"] = X_z.shape[1]

In [121]:
dataset = DataSet(**{'_data1':Y_z,
                         '_data2':X_z,
                    })
    
diff_model2 = DiffModel(**diff_mmdufs_params) 
    
diff_result2= diff_model2.train(dataset,
                              learning_rate=diff_mmdfus_learning_rate,
                        feature_label = gt_feat_diff_Y.reshape(-1), 
                        display_step=display_step, 
                        num_epoch=epochs) #30000
  

num_samples : 1000
Epoch: 1000 loss= -1.173797607 score1= -1.502956271 reg1= 0.164579302 f1= 0.9091
Epoch: 2000 loss= -1.180788398 score1= -1.508459568 reg1= 0.163835570 f1= 0.9091
Epoch: 3000 loss= -1.182477474 score1= -1.509740591 reg1= 0.163631558 f1= 0.9091
Epoch: 4000 loss= -1.182379007 score1= -1.509461164 reg1= 0.163541049 f1= 0.9091
Epoch: 5000 loss= -1.181143761 score1= -1.508136511 reg1= 0.163496390 f1= 0.9091
Epoch: 6000 loss= -1.183027983 score1= -1.509958506 reg1= 0.163465261 f1= 0.9091
Epoch: 7000 loss= -1.183069706 score1= -1.509956002 reg1= 0.163443148 f1= 0.9091
Epoch: 8000 loss= -1.183000445 score1= -1.509855390 reg1= 0.163427472 f1= 0.9091
Epoch: 9000 loss= -1.183033109 score1= -1.509863853 reg1= 0.163415387 f1= 0.9091
Epoch: 10000 loss= -1.183167338 score1= -1.509978414 reg1= 0.163405553 f1= 0.9091
Optimization Finished!


In [122]:
f1_score(gt_feat_diff_Y.reshape(-1).astype(int),1*(diff_model2.get_prob_alpha1()==1).reshape(-1).astype(int))

0.9090909090909091