# STAFF - CPD

In [1]:
import sys
sys.path.append('../src')
from read import *
from model import *
from train import *
from metric import *
device = 'cuda:0'

### 1. Configuration of data, model, and augmentation.

In [2]:
data = 'lastfm_time'
data_path = '../data'

cfg = DotMap()
cfg.name = data
cfg.dpath = data_path
cfg.opath = '../output'
cfg.unfair = 0.05
cfg.bs = 1024
cfg.random = 1
cfg.device = device
tensor = TensorDataset(cfg=cfg, path=cfg.dpath, name=cfg.name)
cfg.sizes = tensor.sizes

***********************************************************************************
[1] Read lastfm_time self...
[2] Read metadata...
[3] No normalization; values are already binary...
[4] Split the tensor into training/validation/test
 [4 - 1] Sparsify the minority group to make it more unfair
[5] Make statistics of group information
[6] Change the date type into torch
[7] Read lastfm_time tensor done...!
Tensor      || ['user', 'artist', 'time']; value
NNZ         || [861, 3066, 1586]; 76727 | 14311 | 14311
Sens. Attr  || user, gender: maj(['M']) min(['F'])
Entity      || Majority: 493 Minority: 368
NNZ         || Majority: [74740] Minority: [1987]
***********************************************************************************


### 2. Configuration of augmentation.

In [3]:
cfg.rank = 10
cfg.lr = 0.01
cfg.wd = 0.01
cfg.n_iters = 10000
verbose = True

In [4]:
cfg.tf = 'cpd'
cfg.aug_tf = 'cpd'
cfg.sampling = 'knn'
cfg.aug_modes ="0"
cfg.K = 3
cfg.gamma = 0.9
cfg.wd2 = 0.01
cfg.aug_training = False
tensor.load_data()


In [5]:
read_augment(tensor, cfg)

***********************************************************************************
Augment entities with fair K-NN graph 
Augmentation for the 'user' mode 
Read a dist file from [../output/lastfm_time/sampling/0.05_cpd_0.9_3_1_dist.csv] (861, 4)
Read a graph file from [../output/lastfm_time/sampling/0.05_cpd_0.9_3_1_graph.csv] (861, 4)
Read a df file from [../output/lastfm_time/sampling/0.05_cpd_0.9_3_1_df.csv] (30749, 6)


### 3. Building a model

In [6]:
model = CPD(cfg).to(cfg.device)
model

CPD(
  (factors): ParameterList(
      (0): Parameter containing: [torch.float32 of size 1722x10 (GPU 0)]
      (1): Parameter containing: [torch.float32 of size 3066x10 (GPU 0)]
      (2): Parameter containing: [torch.float32 of size 1586x10 (GPU 0)]
  )
)

### 4. Train a model

In [7]:
trainer = Trainer(model, tensor, cfg, wandb=None)
trainer.train()

Iters:   1 || training loss: 14469.57295	fair loss: 59458.50787	Train RMSE: 0.25727 Valid RMSE: 0.32735	
Iters:   2 || training loss: 6793.53444	fair loss: 17766.80487	Train RMSE: 0.22351 Valid RMSE: 0.32218	
Iters:   3 || training loss: 5552.17830	fair loss: 8174.27462	Train RMSE: 0.20616 Valid RMSE: 0.32131	
Iters:   4 || training loss: 4816.39394	fair loss: 5560.39015	Train RMSE: 0.19114 Valid RMSE: 0.31862	
Iters:   5 || training loss: 4179.72478	fair loss: 4779.35504	Train RMSE: 0.17673 Valid RMSE: 0.31600	
Iters:   6 || training loss: 3637.50501	fair loss: 4638.69456	Train RMSE: 0.16446 Valid RMSE: 0.31252	
Iters:   7 || training loss: 3217.08371	fair loss: 4662.86038	Train RMSE: 0.15443 Valid RMSE: 0.31023	
Iters:   8 || training loss: 2883.80267	fair loss: 4658.60665	Train RMSE: 0.14652 Valid RMSE: 0.30837	
Iters:   9 || training loss: 2645.47212	fair loss: 4693.80682	Train RMSE: 0.13995 Valid RMSE: 0.30776	
Iters:  10 || training loss: 2458.01136	fair loss: 4718.02899	Train RM

### 5. Evaluate fairness and accuracy of model for tensor completion

In [8]:
res = evaluate_model(model, tensor)
print(f"MSE : {res['test_rmse'] * res['test_rmse']:.4f}")
print(f"MAD: {abs(res['MAD_Error']):.5f} Error1 : {res['Group0_Error']:.5f} Error2: {res['Group1_Error']:.5f}")

Test NRE: 0.3200 Test RMSE: 0.2969
***********************************************************************************
Calculate group fairness...
***********************************************************************************
MSE : 0.0882
MAD: 0.10842 Error1 : 0.04964 Error2: 0.15806
