In [1]:
import sys
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
from collections import defaultdict

sys.path.insert(0, '../')
from relative_prevalence_benchmark.gpu_utils import restrict_GPU_pytorch
restrict_GPU_pytorch('1')

import torch

from wilds import get_dataset
from wilds.common.data_loaders import get_train_loader, get_eval_loader
from wilds.common.grouper import CombinatorialGrouper

import torch
import torchvision.transforms as transforms

sys.path.insert(0, '../wilds/examples/')
from transforms import initialize_transform, getBertTokenizer
from algorithms.initializer import initialize_algorithm

Using GPU:1


In [2]:
full_dataset = get_dataset(dataset='civilcomments', download=True)

In [11]:
class Namespace:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
        
config_dict = pickle.load(open('../wilds/cc_config.pkl', 'rb'))
config = Namespace(**config_dict)
config.algorithm = 'ERM'

train_transform = initialize_transform(transform_name=config.transform,
                                      config=config,
                                      dataset=full_dataset,
                                      is_training=False)
eval_transform = initialize_transform(transform_name=config.transform,
                                      config=config,
                                      dataset=full_dataset,
                                      is_training=False)


In [12]:
train_data = full_dataset.get_subset('train', transform=train_transform)
train_loader = get_train_loader('standard', train_data, batch_size=64)

train_grouper = CombinatorialGrouper(dataset=full_dataset, groupby_fields=config.groupby_fields)

In [13]:
datasets = defaultdict(dict)
for split in full_dataset.split_dict.keys():
    if split=='train':
        transform = train_transform
        verbose = True
    elif split == 'val':
        transform = eval_transform
        verbose = True
    else:
        transform = eval_transform
        verbose = False
    # Get subset
    datasets[split]['dataset'] = full_dataset.get_subset(
        split,
        frac=config.frac,
        transform=transform)

    if split == 'train':
        datasets[split]['loader'] = get_train_loader(
            loader=config.train_loader,
            dataset=datasets[split]['dataset'],
            batch_size=config.batch_size,
            uniform_over_groups=config.uniform_over_groups,
            grouper=train_grouper,
            distinct_groups=config.distinct_groups,
            n_groups_per_batch=config.n_groups_per_batch,
        **config.loader_kwargs)
else:
    datasets[split]['loader'] = get_eval_loader(
        loader=config.eval_loader,
        dataset=datasets[split]['dataset'],
        grouper=train_grouper,
        batch_size=config.batch_size,
        **config.loader_kwargs)

# Set fields
datasets[split]['split'] = split
datasets[split]['name'] = full_dataset.split_names[split]
datasets[split]['verbose'] = verbose

# Loggers
# datasets[split]['eval_logger'] = BatchLogger(
#     os.path.join(config.log_dir, f'{split}_eval.csv'), mode=mode, use_wandb=(config.use_wandb and verbose))
# datasets[split]['algo_logger'] = BatchLogger(
#     os.path.join(config.log_dir, f'{split}_algo.csv'), mode=mode, use_wandb=(config.use_wandb and verbose))

In [14]:
model = initialize_algorithm(config, datasets, train_grouper)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertClassifier: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertClassifier from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertClassifier from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertClassifier were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.weight', 'classifier.bias', 'pre_classifier.weight']
You should probably TRAIN t

In [20]:
# Load weights from best trained weights 
model.model

DistilBertClassifier(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Li

In [None]:
# Format for content moderatione experiments
### SETTING 1 

# Group attribute = subreddit
# Label = toxicity
# Note: toxicity can be either y or s 

### SETTING 2 
# Group attribute = ethnicity metadata
# Label = toxicity
# Note: these labels could be y or s, depending on which subset we work with

### Exploring examples

In [47]:
idx = 3
dataset[idx], dataset.y_array[idx]

(("Blame men.  There's always an excuse to blame men for the failures of women.",
  tensor(1),
  tensor([1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1])),
 tensor(1))