In [None]:
!rm -rf DeBERTa
!git clone https://github.com/huberemanuel/DeBERTa.git
!pip install -r DeBERTa/requirements.txt

Cloning into 'DeBERTa'...
remote: Enumerating objects: 233, done.[K
remote: Counting objects: 100% (233/233), done.[K
remote: Compressing objects: 100% (165/165), done.[K
remote: Total 397 (delta 142), reused 129 (delta 63), pack-reused 164[K
Receiving objects: 100% (397/397), 434.81 KiB | 11.15 MiB/s, done.
Resolving deltas: 100% (197/197), done.
Collecting ujson
[?25l  Downloading https://files.pythonhosted.org/packages/f1/84/e039c6ffc6603f2dfe966972d345d4f650a4ffd74b18c852ece645de12ac/ujson-4.0.1-cp36-cp36m-manylinux1_x86_64.whl (179kB)
[K     |████████████████████████████████| 184kB 9.0MB/s 
[?25hCollecting seqeval
[?25l  Downloading https://files.pythonhosted.org/packages/9d/2d/233c79d5b4e5ab1dbf111242299153f3caddddbb691219f363ad55ce783d/seqeval-1.2.2.tar.gz (43kB)
[K     |████████████████████████████████| 51kB 5.6MB/s 
Collecting torch==1.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/ae/05/50a05de5337f7a924bb8bd70c6936230642233e424d6a9747ef1cfbde353/tor

# Setup experiment

In [None]:
%%writefile DeBERTa/data/clean_data.py


import glob
import pandas as pd
import os

output_dir = "output"
train_split = 0.8

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

for csv_data in glob.glob("*.csv"):
    df = pd.read_csv(csv_data)

    if "train" in csv_data:
        df_train = df.sample(frac=train_split)
        df_test = df.drop(df_train.index)

        df_train.to_csv(f"{output_dir}/{csv_data.split('.')[0]}.tsv", sep='\t', index=False)
        test_filename = csv_data.replace("train", "dev")
        df_test.to_csv(f"{output_dir}/{test_filename.split('.')[0]}.tsv", sep='\t', index=False)
    else:
        df.to_csv(f"{output_dir}/{csv_data.split('.')[0]}.tsv", sep='\t', index=False)

Overwriting DeBERTa/data/clean_data.py


In [None]:
!cd DeBERTa/data && python clean_data.py

In [None]:
import pandas as pd

for data_set in ["train", "test"]:
    df_lap = pd.read_csv(f"DeBERTa/data/laptop_{data_set}.csv")
    print("-"*20)
    print("laptop")
    print(df_lap.shape)
    print(df_lap.sentiment.value_counts())
    print("-"*20)
    df_rest = pd.read_csv(f"DeBERTa/data/restaurants_{data_set}.csv")
    print("-"*20)
    print("rest")
    print(df_rest.shape)
    print(df_rest.sentiment.value_counts())
    df_combined = df_lap.append(df_rest)
    print("Combined samples: ", df_combined.shape)

--------------------
laptop
(2328, 3)
 1    994
-1    870
 0    464
Name: sentiment, dtype: int64
--------------------
--------------------
rest
(3608, 3)
 1    2164
-1     807
 0     637
Name: sentiment, dtype: int64
Combined samples:  (5936, 3)
--------------------
laptop
(638, 3)
 1    341
 0    169
-1    128
Name: sentiment, dtype: int64
--------------------
--------------------
rest
(1120, 3)
 1    728
-1    196
 0    196
Name: sentiment, dtype: int64
Combined samples:  (1758, 3)


In [None]:
import pandas as pd

for data_set in ["train", "dev", "test"]:
    # df_lap = pd.read_csv(f"DeBERTa/data/output/laptop_{data_set}_clean.tsv", sep="\t")
    df_lap = pd.read_csv(f"DeBERTa/data/output/laptop_{data_set}.tsv", sep="\t")
    # df_rest = pd.read_csv(f"DeBERTa/data/output/restaurants_{data_set}_clean.tsv", sep="\t")
    df_rest = pd.read_csv(f"DeBERTa/data/output/restaurants_{data_set}.tsv", sep="\t")
    df_combined = df_lap.append(df_rest)
    df_combined.to_csv(f"DeBERTa/data/output/{data_set}.tsv", index=False, sep="\t")
    print(data_set)
    print(df_combined.shape)

train
(4748, 3)
dev
(1188, 3)
test
(1758, 3)


In [None]:
!grep -rsn "The bread is top notch" DeBERTa/data/output/*.*

DeBERTa/data/output/restaurants_test.tsv:2:The bread is top notch as well .	bread	1
DeBERTa/data/output/test.tsv:640:The bread is top notch as well .	bread	1


The execution will fail but the Semeval dataset will be downloaded.

In [None]:
!mkdir -p /tmp/DeBERTa/glue_tasks/QQP/
!cp /content/DeBERTa/data/output/train.tsv /tmp/DeBERTa/glue_tasks/QQP/train.tsv
!cp /content/DeBERTa/data/output/dev.tsv /tmp/DeBERTa/glue_tasks/QQP/dev.tsv
!cp /content/DeBERTa/data/output/test.tsv /tmp/DeBERTa/glue_tasks/QQP/test.tsv

In [None]:
%%writefile DeBERTa/DeBERTa/training/dist_launcher.py

# Copyright (c) Microsoft, Inc. 2020
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Author: penhe@microsoft.com
# Date: 05/15/2019
#

import os
import time
import pdb
import signal
import torch
from multiprocessing import Process,Pool
from collections import defaultdict
import sys
import psutil
from ..utils import set_logger, get_logger
logger = get_logger()

def kill_children(proc=None, recursive = True):
  if proc is None:
    proc = psutil.Process()
  _children = proc.children(recursive=False)
  for c in _children:
    try:
      if recursive:
        kill_children(c, recursive=recursive)
      os.kill(c.pid, signal.SIGKILL)
    except:
      pass

  for c in _children:
    try:
      c.wait(1)
    except:
      pass

def gc(i):
  return torch.cuda.device_count()

def get_ngpu():
  return 0
  with Pool(1) as p:
    return p.map(gc, range(1))[0]

def _setup_distributed_group(args):
  """Initialize torch.distributed."""

  torch.backends.cudnn.enabled = False
  args.world_size = 1
  if args.world_size == 1:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  else:
    set_logger(args.task_name, os.path.join(args.output_dir, f'training_{args.task_name}_{args.rank}.log'), rank=args.rank, verbose=1 if args.local_rank==0 else 0)
    device_id = args.rank % args.n_gpu if args.n_gpu > 0 else 0
    if args.local_rank >= 0:
      device_id = args.local_rank
    device = torch.device("cuda", device_id)
    init_method = 'tcp://'
    init_method += args.master_ip + ':' + args.master_port
    distributed_backend = getattr(args, 'distributed_backend', 'nccl')
    torch.distributed.init_process_group(
      backend=distributed_backend,
      world_size=args.world_size, rank=args.rank,
      init_method=init_method)
    torch.cuda.set_device(device)
  n_gpu = torch.cuda.device_count()
  logger.info("device=%s, n_gpu=%d, distributed training=%r, world_size=%d", device, n_gpu, bool(args.world_size != 1), args.world_size)
  return device

def _get_world_size(args):
    world_size = int(os.getenv("WORLD_SIZE", '1'))
    if not hasattr(args, 'n_gpu') or args.n_gpu is None:
      n_gpu = get_ngpu()
    return n_gpu * world_size

def initialize_distributed(args, join=True):
    args.world_size = int(os.getenv("WORLD_SIZE", '1'))
    args.rank = int(os.getenv('RANK', '0'))
    args.master_ip = os.getenv('MASTER_ADDR', 'localhost')
    args.master_port = os.getenv('MASTER_PORT', '17006')
  
    if args.world_size == 1:
      args.rank = 0
      args.master_ip = 'localhost'

    if not hasattr(args, 'n_gpu') or args.n_gpu is None:
      args.n_gpu = get_ngpu()

    args.node_rank = args.rank
    args.world_size = args.n_gpu * args.world_size
    seed = args.seed
    is_child = False
    if args.world_size>1:
      children = []
      for r in range(args.n_gpu):
        args.rank = r + args.n_gpu*args.node_rank
        args.local_rank = r
        args.seed = seed + args.rank
        child = os.fork()
        if child>0:
          children.append(child)
        else:
          signal.signal(signal.SIGINT, signal.SIG_IGN)
          is_child = True
          break
    else:
      is_child = True

    if is_child:
      return _setup_distributed_group(args)
    else:
      if join:
        try:
          for c in children:
            cid, ccode = os.waitpid(0,0)
            logger.debug(f'Worker {c} done with code {ccode}')
            if ccode != 0:
              logger.error(f'Worker {c} : {cid} failed with code {ccode}')
              kill_children()
              raise ValueError(f'Job failed. {cid}:{ccode}')
        except (KeyboardInterrupt, SystemExit):
          logger.warning('Keybord interrupt by user. Terminate all processes')
          kill_children(None)
      return children

def test_dist_launch():
  def test_functions(args):
    global logger
    set_logger(args.task_name, os.path.join(args.output_dir, f'training_{args.task_name}_{args.node_rank}.log'), rank=args.rank)
    logger.info(args)

  class Args:
    def __init__(self):
      pass
    def __repr__(self):
      return str(self.__dict__)

  args = Args()
  args.task_name = 'test'
  args.seed = 0
  args.n_gpu = None
  args.no_cuda=False
  args.output_dir = '/tmp'
  distributed_launch(args, test_functions, (args,))

def test_init_dist():
  class Args:
    def __init__(self):
      pass
    def __repr__(self):
      return str(self.__dict__)

  args = Args()
  args.task_name = 'test'
  args.seed = 0
  args.n_gpu = None
  args.no_cuda=False
  args.output_dir = '/tmp'
  device = initialize_distributed(args)
  if isinstance(device, torch.device):
    return 0
  else:
    return 1




Overwriting DeBERTa/DeBERTa/training/dist_launcher.py


In [None]:
!chmod +x /content/DeBERTa/experiments/glue/qqp_base.sh
!/content/DeBERTa/experiments/glue/qqp_base.sh

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  8459  100  8459    0     0  18881      0 --:--:-- --:--:-- --:--:-- 18881
Downloading and extracting QQP...
Traceback (most recent call last):
  File "<stdin>", line 172, in <module>
  File "<stdin>", line 168, in main
  File "<stdin>", line 57, in download_and_extract
  File "/usr/lib/python3.6/urllib/request.py", line 248, in urlretrieve
    with contextlib.closing(urlopen(url, data)) as fp:
  File "/usr/lib/python3.6/urllib/request.py", line 223, in urlopen
    return opener.open(url, data, timeout)
  File "/usr/lib/python3.6/urllib/request.py", line 532, in open
    response = meth(req, response)
  File "/usr/lib/python3.6/urllib/request.py", line 642, in http_response
    'http', request, response, code, msg, hdrs)
  File "/usr/lib/python3.6/urllib/request.py", line 570, in error
    return self._call_chain(*args)
  File

Running the experiment for a specific dataset (laptop, restaurants)

In [None]:
dataset = "laptop"
!cp /content/DeBERTa/data/output/{dataset}_train_clean.tsv /tmp/DeBERTa/glue_tasks/QQP/train.tsv
!cp /content/DeBERTa/data/output/{dataset}_dev_clean.tsv /tmp/DeBERTa/glue_tasks/QQP/dev.tsv
# !cp /content/DeBERTa/data/output/{dataset}_test_clean.tsv /tmp/DeBERTa/glue_tasks/QQP/test.tsv
!cp /content/DeBERTa/data/output/test.tsv /tmp/DeBERTa/glue_tasks/QQP/test.tsv

Para rodar o experimento com os datasets combinados

In [None]:
!cp /content/DeBERTa/data/output/train.tsv /tmp/DeBERTa/glue_tasks/QQP/train.tsv
!cp /content/DeBERTa/data/output/dev.tsv /tmp/DeBERTa/glue_tasks/QQP/dev.tsv
!cp /content/DeBERTa/data/output/test.tsv /tmp/DeBERTa/glue_tasks/QQP/test.tsv

In [None]:
# !cd DeBERTa/data && python clean_data.py
!head DeBERTa/data/output/train.tsv


review	aspect	sentiment
-4 RAM slots , 2 HDD Bays * , 16GB RAM support - No Wireless Issues , at least for me .	Wireless	1
Quality Display I was surprised with the performance and quality of this HP Laptop .	Quality Display	1
-LRB- The SATA controller is the motherboard chip that lets the CPU talk to the hard drive . -RRB-	CPU	0
Fan vents to the side , so no cooling pad needed , great feature !	Fan	1
Everything is so easy to use , Mac software is just so much simpler than Microsoft software .	use	1
Also , HDD secures inside using rails , and there is only one set on the main hard drive .	HDD	1
The display is incredibly bright , much brighter than my PowerBook and very crisp .	display	1
Cords coming out the right for power plus cords coming out front for headphones/mic plus network connection on left make for a very messy setup with cords going every direction .	Cords	-1
The board has a bad connector with the power supply and shortly after warrenty expires the power supply will start ha

Experiment execution

In [None]:
%%writefile /content/DeBERTa/experiments/glue/qqp_base.sh
#!/bin/bash
SCRIPT=$(readlink -f "$0")
SCRIPT_DIR=$(dirname "$SCRIPT")
cd $SCRIPT_DIR

cache_dir=/tmp/DeBERTa/

function setup_glue_data(){
	task=$1
	mkdir -p $cache_dir
	if [[ ! -e $cache_dir/glue_tasks/${task}/train.tsv ]]; then
		curl -J -L https://raw.githubusercontent.com/nyu-mll/jiant/v1.3.2/scripts/download_glue_data.py | python3 - --data_dir $cache_dir/glue_tasks --tasks $task
	fi
}

init=base 

tag=Base
Task=QQP
setup_glue_data $Task
../utils/train.sh -i $init -p --config config.json -t $Task --data $cache_dir/glue_tasks/$Task --tag $tag -o /tmp/ttonly/$tag/$task -- --num_train_epochs 10 --accumulative_update 1 --warmup 500 --learning_rate 1e-5 --train_batch_size 16 --max_seq_length 256 --dump 5000 --cls_drop 0.15 --fp16 True


Overwriting /content/DeBERTa/experiments/glue/qqp_base.sh


In [None]:
!rm -rf /tmp/ttonly/Base/QQP/

In [None]:
!chmod +x /content/DeBERTa/experiments/glue/qqp_base.sh
!/content/DeBERTa/experiments/glue/qqp_base.sh

12/15/2020 01:19:28|INFO|QQP|00| Namespace(accumulative_update=1, adam_beta1=0.9, adam_beta2=0.999, cls_drop_out=0.15, data_dir='/tmp/DeBERTa//glue_tasks/QQP', debug=False, do_eval=True, do_predict=True, do_train=True, dump_interval=5000, epsilon=1e-06, eval_batch_size=128, fp16=True, init_model='base', learning_rate=1e-05, local_rank=-1, lookahead_alpha=0.5, lookahead_k=-1, loss_scale=16384.0, lr_schedule='warmup_linear', lr_schedule_ends=0, max_grad_norm=1, max_seq_length=256, model_config='/tmp/ttonly/Base/QQP/model_config.json', num_train_epochs=10.0, opt_type='adam', output_dir='/tmp/ttonly/Base/QQP', pre_trained=None, predict_batch_size=128, scale_steps=250, seed=1234, tag='deberta_Base', task_name='QQP', train_batch_size=16, warmup_proportion=500.0, weight_decay=0.01, with_radam=False, workers=1)
12/15/2020 01:19:28|INFO|QQP|00| Total corpus examples: 1188
12/15/2020 01:19:28|INFO|QQP|00|   Evaluation batch size = 128
12/15/2020 01:19:29|INFO|QQP|00| Total corpus examples: 1758


Results export

In [None]:
# !rm -rf /tmp/ttonly/Base/QQP/*.*
!ls -loh /tmp/ttonly/Base/QQP/pytorch*

-rw-r--r-- 1 root 532M Dec  3 17:50 /tmp/ttonly/Base/QQP/pytorch.model-000297.bin
-rw-r--r-- 1 root 532M Dec  3 17:52 /tmp/ttonly/Base/QQP/pytorch.model-000594.bin
-rw-r--r-- 1 root 532M Dec  3 17:55 /tmp/ttonly/Base/QQP/pytorch.model-000891.bin
-rw-r--r-- 1 root 532M Dec  3 17:58 /tmp/ttonly/Base/QQP/pytorch.model-001188.bin
-rw-r--r-- 1 root 532M Dec  3 18:01 /tmp/ttonly/Base/QQP/pytorch.model-001485.bin
-rw-r--r-- 1 root 532M Dec  3 18:03 /tmp/ttonly/Base/QQP/pytorch.model-001782.bin
-rw-r--r-- 1 root 532M Dec  3 18:06 /tmp/ttonly/Base/QQP/pytorch.model-002079.bin
-rw-r--r-- 1 root 532M Dec  3 18:09 /tmp/ttonly/Base/QQP/pytorch.model-002376.bin
-rw-r--r-- 1 root 532M Dec  3 18:12 /tmp/ttonly/Base/QQP/pytorch.model-002673.bin
-rw-r--r-- 1 root 532M Dec  3 18:14 /tmp/ttonly/Base/QQP/pytorch.model-002967.bin


In [None]:
# !zip -r -9 /content/laptop_b32_v256.zip /tmp/ttonly/Base/QQP

!rm -rf  /content/output/
!mkdir /content/output/
!cp /tmp/ttonly/Base/QQP/*.txt /content/output/
!cp /tmp/ttonly/Base/QQP/*.json /content/output/
!cp /tmp/ttonly/Base/QQP/*.tsv /content/output/
!cp /tmp/ttonly/Base/QQP/*.log /content/output/
!cp /tmp/ttonly/Base/QQP/pytorch.model-002967.bin /content/output/

In [None]:
!zip -r -9 /content/all_b64_v256.zip /content/output

  adding: content/output/ (stored 0%)
  adding: content/output/eval_results_dev_deberta_Base.txt (deflated 28%)
  adding: content/output/eval_results_dev_001485-2967.txt (deflated 24%)
  adding: content/output/eval_results_dev_001782-2967.txt (deflated 25%)
  adding: content/output/submit-dev-001188-2967.tsv (deflated 64%)
  adding: content/output/eval_results_dev_002079-2967.txt (deflated 26%)
  adding: content/output/eval_results_dev_002673-2967.txt (deflated 28%)
  adding: content/output/submit-dev-000891-2967.tsv (deflated 64%)
  adding: content/output/submit-dev-deberta_Base.tsv (deflated 68%)
  adding: content/output/pytorch.model-002967.bin (deflated 13%)
  adding: content/output/submit-dev-000297-2967.tsv (deflated 64%)
  adding: content/output/submit-dev-002673-2967.tsv (deflated 64%)
  adding: content/output/eval_results_dev_002967-2967.txt (deflated 25%)
  adding: content/output/eval_results_dev_002376-2967.txt (deflated 27%)
  adding: content/output/submit-dev-001782-2967.t