In [1]:
import torch
import numpy as np

def load_user_attributes_movielens(file, M):
    #(gender, age, occupation)
    user_attributes = {}
    gender_dist = {'F':0, 'M':0}
    age_dist = {1:0, 18:0, 25:0, 35:0, 45:0, 50:0, 56:0}
    occupation_dist = {occup:0 for occup in range(21)}

    with open(file, 'r') as fin:
        lines = fin.readlines()
        for line in lines:
            eachline = line.strip().split('::')
            user_idx = int(eachline[0])
            gender = eachline[1]
            age = int(eachline[2])
            occupation = int(eachline[3])
            user_attributes[user_idx] = (gender, age, occupation)

    return user_attributes


In [2]:
import argparse
from model import Grace
from aug import aug
from dataset import load

import numpy as np
import torch as th
import torch.nn as nn

from eval import label_classification, eval_unbiasedness_movielens
import warnings

warnings.filterwarnings('ignore')


def count_parameters(model):
    return sum([np.prod(p.size()) for p in model.parameters() if p.requires_grad])


parser = argparse.ArgumentParser()
parser.add_argument('--dataname', type=str, default='movielens')
parser.add_argument('--gpu', type=int, default=0)
parser.add_argument('--split', type=str, default='random')
parser.add_argument('--debias_method', type=str, default='uge-w', choices=['uge-r', 'uge-w', 'uge-c', 'none'], help='debiasing method to apply')
parser.add_argument('--debias_attr', type=str, default='age', help='sensitive attribute to be debiased')
parser.add_argument('--reg_weight', type=float, default=0.2, help='weight for the regularization based debiasing term')  

parser.add_argument('--epochs', type=int, default=100, help='Number of training periods.')
parser.add_argument('--lr', type=float, default=0.001, help='Learning rate.')
parser.add_argument('--wd', type=float, default=1e-5, help='Weight decay.')
parser.add_argument('--temp', type=float, default=1.0, help='Temperature.')

parser.add_argument('--act_fn', type=str, default='relu')

parser.add_argument("--hid_dim", type=int, default=256, help='Hidden layer dim.')
parser.add_argument("--out_dim", type=int, default=256, help='Output layer dim.')

parser.add_argument("--num_layers", type=int, default=2, help='Number of GNN layers.')
parser.add_argument('--der1', type=float, default=0.2, help='Drop edge ratio of the 1st augmentation.')
parser.add_argument('--der2', type=float, default=0.2, help='Drop edge ratio of the 2nd augmentation.')
parser.add_argument('--dfr1', type=float, default=0.2, help='Drop feature ratio of the 1st augmentation.')
parser.add_argument('--dfr2', type=float, default=0.2, help='Drop feature ratio of the 2nd augmentation.')

args = parser.parse_args("")

if args.gpu != -1 and th.cuda.is_available():
    args.device = 'cuda:{}'.format(args.gpu)
else:
    args.device = 'cpu'

# Step 1: Load hyperparameters =================================================================== #
lr = args.lr
hid_dim = args.hid_dim
out_dim = args.out_dim

num_layers = args.num_layers
act_fn = ({'relu': nn.ReLU(), 'prelu': nn.PReLU()})[args.act_fn]

drop_edge_rate_1 = args.der1
drop_edge_rate_2 = args.der2
drop_feature_rate_1 = args.dfr1
drop_feature_rate_2 = args.dfr2

temp = args.temp
epochs = args.epochs
wd = args.wd
debias_method = args.debias_method

# Step 2: Prepare data =================================================================== #
if debias_method in ['uge-w', 'uge-c']:
    dataset = '{}_debias_{}'.format(args.dataname, args.debias_attr)
else:
    dataset = args.dataname

graph = load(dataset)
in_dim = graph.ndata['feat'].shape[1]

Precomputed weights for weighting-based debiasing UGE-W Loaded
Creating DGL graph...
Finished data loading and preprocessing.
  NumNodes: 9992
  NumEdges: 2010410
  NumFeats: 18


In [3]:
from dataset import SENSITIVE_ATTR_DICT  # predefined sensitive attributes for different datasets
from dataset import DATA_FOLDER, RAW_FOLDER

M = 6040 + 1
N = 3952 + 1

user_attributes = load_user_attributes_movielens('{}/ml-1m/users.dat'.format(RAW_FOLDER), M)

genders = np.array([int(user_attributes[i][0]=='M') for i in range(1, M)])
ages = np.array([int(user_attributes[i][1]) for i in range(1, M)])
occupations = np.array([int(user_attributes[i][2]) for i in range(1, M)])

NameError: name 'a' is not defined

In [9]:
genders.shape[0]

6040

In [11]:
class FairAugMatr:
  def __init__(self, graph, sens_attr_list):
    adj = graph.adj()

    self.sens_attr_list = sens_attr_list
    def sim(edge):
      if edge[0] < self.sens_attr_list.shape[0] and edge[1] < self.sens_attr_list.shape[0]:
        return self.sens_attr_list[edge[0]] == self.sens_attr_list[edge[1]]
      else:
        return False


    self.edges = adj.coalesce().indices().t()
    self.weights = [sim(edge) for edge in self.edges]


    # self.matr = torch.sparse_coo_tensor(torch.tensor([[],[]]), torch.tensor([]), adj.shape)
    # self.sens_attr_list = sens_attr_list

    # for edge in self.edges:
    #   print(edge)


a = FairAugMatr(graph, genders)

In [16]:
a.weights.sum()

tensor(6040)