### Before running this notebook, make sure to upload the `CommonsenseQA` folder as a zipped file to the working directory
- `/home/jupyter/` in case of Google AI platform notebooks.
- `/content/` in case of Google Colab

In [None]:
# Confirm GPU
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

In [1]:
# Unzip code
!unzip CommonsenseQA.zip

Archive:  CommonsenseQA.zip
   creating: CommonsenseQA/
   creating: CommonsenseQA/data/
   creating: CommonsenseQA/data/CommonsenseQA/
  inflating: CommonsenseQA/data/CommonsenseQA/dict.txt  
  inflating: CommonsenseQA/data/CommonsenseQA/test.jsonl  
  inflating: CommonsenseQA/data/CommonsenseQA/train.jsonl  
  inflating: CommonsenseQA/data/CommonsenseQA/valid.jsonl  
   creating: CommonsenseQA/fairseq/
   creating: CommonsenseQA/fairseq/examples/
   creating: CommonsenseQA/fairseq/examples/roberta/
   creating: CommonsenseQA/fairseq/examples/roberta/commonsense_qa/
  inflating: CommonsenseQA/fairseq/examples/roberta/commonsense_qa/commonsense_qa_task.py  
  inflating: CommonsenseQA/fairseq/examples/roberta/commonsense_qa/download_cqa_data.sh  
  inflating: CommonsenseQA/fairseq/examples/roberta/commonsense_qa/README.md  
  inflating: CommonsenseQA/fairseq/examples/roberta/commonsense_qa/__init__.py  
  inflating: CommonsenseQA/finetune.sh  


In [2]:
!pip install fairseq

Collecting fairseq
[?25l  Downloading https://files.pythonhosted.org/packages/67/bf/de299e082e7af010d35162cb9a185dc6c17db71624590f2f379aeb2519ff/fairseq-0.9.0.tar.gz (306kB)
[K     |████████████████████████████████| 307kB 2.8MB/s eta 0:00:01
Collecting sacrebleu
  Downloading https://files.pythonhosted.org/packages/31/67/a895f0aa46891a6af4bd00e98009eae59c065f2b220de663ca3e948da453/sacrebleu-1.4.4-py3-none-any.whl
Collecting portalocker
  Downloading https://files.pythonhosted.org/packages/91/db/7bc703c0760df726839e0699b7f78a4d8217fdc9c7fcb1b51b39c5a22a4e/portalocker-1.5.2-py2.py3-none-any.whl
Collecting typing
  Downloading https://files.pythonhosted.org/packages/fe/2e/b480ee1b75e6d17d2993738670e75c1feeb9ff7f64452153cf018051cc92/typing-3.7.4.1-py3-none-any.whl
Building wheels for collected packages: fairseq
  Building wheel for fairseq (setup.py) ... [?25ldone
[?25h  Created wheel for fairseq: filename=fairseq-0.9.0-cp37-cp37m-linux_x86_64.whl size=2017047 sha256=ecbc9265858f5e1fac

In [3]:
# Download roberta model
!wget -O roberta.large.tar.gz https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz
!tar -xvzf roberta.large.tar.gz

# !wget -O /content/CommonsenseQA/roberta.base.tar.gz https://dl.fbaipublicfiles.com/fairseq/models/roberta.base.tar.gz
# !tar -xvzf /content/CommonsenseQA/roberta.base.tar.gz

--2020-03-13 06:14:29--  https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.20.22.166, 104.20.6.166, 2606:4700:10::6814:6a6, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.20.22.166|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 655283069 (625M) [application/gzip]
Saving to: ‘roberta.large.tar.gz’


2020-03-13 06:14:51 (29.0 MB/s) - ‘roberta.large.tar.gz’ saved [655283069/655283069]

roberta.large/
roberta.large/dict.txt
roberta.large/model.pt
roberta.large/NOTE


In [4]:
%cd /home/jupyter/CommonsenseQA
!pwd

/home/jupyter/CommonsenseQA
/home/jupyter/CommonsenseQA


In [1]:
## Write the finetuning part to a bash script file
# Modified following from the original script to get it to run on Google AI platform and Colab
# - Set MAX_SENTENCES=8
# - Added --update-freq 4

%%writefile finetune.sh
#!/bin/bash

MAX_UPDATES=3000      # Number of training steps.
WARMUP_UPDATES=150    # Linearly increase LR over this many steps.
LR=1e-05              # Peak LR for polynomial LR scheduler.
MAX_SENTENCES=8      # Batch size.
SEED=23                # Random seed.

BASEDIR=/home/jupyter
# CQA_PATH=/content/CommonsenseQA # For Google Colab
CQA_PATH=$BASEDIR/CommonsenseQA # For Kaggle
ROBERTA_PATH=${BASEDIR}/roberta.large/model.pt
DATA_DIR=${CQA_PATH}/data/CommonsenseQA

# we use the --user-dir option to load the task from
# the examples/roberta/commonsense_qa directory:
FAIRSEQ_PATH=${CQA_PATH}/fairseq
FAIRSEQ_USER_DIR=${FAIRSEQ_PATH}/examples/roberta/commonsense_qa

cd $FAIRSEQ_PATH
CUDA_VISIBLE_DEVICES=0 fairseq-train --fp16 --ddp-backend=no_c10d \
    $DATA_DIR \
    --update-freq 4 \
    --save-dir ./checkpoints \
    --user-dir $FAIRSEQ_USER_DIR \
    --restore-file $ROBERTA_PATH \
    --reset-optimizer --reset-dataloader --reset-meters \
    --no-epoch-checkpoints --no-last-checkpoints --no-save-optimizer-state \
    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
    --task commonsense_qa --init-token 0 --bpe gpt2 \
    --arch roberta_large --max-positions 512 \
    --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \
    --criterion sentence_ranking --num-classes 5 \
    --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 --clip-norm 0.0 \
    --lr-scheduler polynomial_decay --lr $LR \
    --warmup-updates $WARMUP_UPDATES --total-num-update $MAX_UPDATES \
    --max-sentences $MAX_SENTENCES \
    --max-update $MAX_UPDATES \
    --log-format simple --log-interval 25 \
    --seed $SEED

Overwriting finetune.sh


In [2]:
# Finetune
!bash finetune.sh

Namespace(activation_dropout=0.0, activation_fn='gelu', adam_betas='(0.9, 0.98)', adam_eps=1e-06, arch='roberta_large', attention_dropout=0.1, best_checkpoint_metric='accuracy', bpe='gpt2', bucket_cap_mb=25, clip_norm=0.0, cpu=False, criterion='sentence_ranking', curriculum=0, data='/home/jupyter/CommonsenseQA/data/CommonsenseQA', dataset_impl=None, ddp_backend='no_c10d', device_id=0, disable_validation=False, distributed_backend='nccl', distributed_init_method=None, distributed_no_spawn=False, distributed_port=-1, distributed_rank=0, distributed_world_size=1, dropout=0.1, empty_cache_freq=0, encoder_attention_heads=16, encoder_embed_dim=1024, encoder_ffn_embed_dim=4096, encoder_layerdrop=0, encoder_layers=24, encoder_layers_to_keep=None, end_learning_rate=0.0, fast_stat_sync=False, find_unused_parameters=False, fix_batches_to_gpus=False, fixed_validation_seed=None, force_anneal=None, fp16=True, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, gpt2_encoder_json='h

In [2]:
%cd /home/jupyter/CommonsenseQA/fairseq
!pwd

/home/jupyter/CommonsenseQA/fairseq
/home/jupyter/CommonsenseQA/fairseq


In [1]:
# Try to resolve import path issues

%cd /home/jupyter/CommonsenseQA/fairseq/examples/roberta/commonsense_qa
import sys
# sys.path.insert(0, '/home/jupyter/CommonsenseQA/fairseq')
# sys.path.insert(0, '/home/jupyter/CommonsenseQA/fairseq/examples/roberta/commonsense_qa')
print(sys.path)
# import examples
# from examples.roberta import commonsense_qa
import commonsense_qa_task

/home/jupyter/CommonsenseQA/fairseq/examples/roberta/commonsense_qa
['/home/jupyter/CommonsenseQA', '/opt/anaconda3/lib/python37.zip', '/opt/anaconda3/lib/python3.7', '/opt/anaconda3/lib/python3.7/lib-dynload', '', '/opt/anaconda3/lib/python3.7/site-packages', '/opt/anaconda3/lib/python3.7/site-packages/IPython/extensions', '/home/jupyter/.ipython']


In [3]:
%cd /home/jupyter/CommonsenseQA/fairseq/examples/roberta/commonsense_qa

import json
import torch
from fairseq.models.roberta import RobertaModel
# from examples.roberta import commonsense_qa  # load the Commonsense QA task
import commonsense_qa_task  # load the Commonsense QA task
roberta = RobertaModel.from_pretrained('/home/jupyter/CommonsenseQA/fairseq/checkpoints', 'checkpoint_best.pt', '/home/jupyter/CommonsenseQA/data/CommonsenseQA')
print(0)
roberta.eval()  # disable dropout
print(1)
roberta.cuda()  # use the GPU (optional)
nsamples, ncorrect = 0, 0
wrong = []
with open('/home/jupyter/CommonsenseQA/data/CommonsenseQA/valid.jsonl') as h:
    print(3)
    for line in h:
        example = json.loads(line)
        scores = []
        for choice in example['question']['choices']:
            input = roberta.encode(
                'Q: ' + example['question']['stem'],
                'A: ' + choice['text'],
                no_separator=True
            )
            score = roberta.predict('sentence_classification_head', input, return_logits=True)
            scores.append(score)
#             print(choice['label'], score.data.item())

        pred = torch.cat(scores).argmax()
#         print('pred: ', chr(ord('A') + pred), 'correct: ', example['answerKey'])
        answer = ord(example['answerKey']) - ord('A')
        nsamples += 1
        if pred == answer:
            ncorrect += 1
        else:
            example['predicted'] = chr(ord('A') + pred)
            example['scores'] = {chr(ord('A') + i): s.data.item() for (i, s) in enumerate(scores)}
            wrong.append(json.dumps(example))

print(4)
# Write a file with JSON lines for wrong predictions
with open('/home/jupyter/CommonsenseQA/wrong_preds.jsonl', 'w') as f:
    f.write('\n'.join(wrong))

print(5)
print(f'Accuracy: {ncorrect}/{nsamples} = {ncorrect / float(nsamples)}')
# Accuracy: 0.7846027846027847

/home/jupyter/CommonsenseQA/fairseq/examples/roberta/commonsense_qa
loading archive file /home/jupyter/CommonsenseQA/fairseq/checkpoints
loading archive file /home/jupyter/CommonsenseQA/data/CommonsenseQA
| dictionary: 50265 types
Accuracy: 0.782964782964783


In [None]:
#Accuracy: 0.782964782964783

In [None]:
# wrong_preds jsonl2tsv
import json

choice_chars = ['A', 'B', 'C', 'D', 'E']
tsvlines = ['id\tquestion_concept\tquestion\tchoiceA\tscoreA\tchoiceB\tscoreB\tchoiceC\tscoreC\tchoiceD\tscoreD\tchoiceE\tscoreE\tanswer\tpredicted']
with open('/home/jupyter/CommonsenseQA/wrong_preds.jsonl') as f:
    for line in f:
        q = json.loads(line)
        l = []
        l.append(q['id'])
        l.append(q['question']['question_concept'])
        l.append(q['question']['stem'])

        choices = {}
        for c in q['question']['choices']:
            choices[c['label']] = f"{c['text']}\t{round(q['scores'][c['label']], 4)}"
        # To make sure TSV has choices in the order A,B,C,D,E
        for c in choice_chars:
            l.append(choices[c])

        l.append(q['answerKey'])
        l.append(q['predicted'])
        tsvlines.append('\t'.join(l))
        # print('\n'.join(tsvlines))
        # break

with open('/home/jupyter/CommonsenseQA/wrong_preds.tsv', 'w') as f:
    f.write('\n'.join(tsvlines))

In [3]:
# analysing proper nouns in the validation dataset
!pip install spacy[cuda100]
!python -m spacy download en_core_web_lg

You are using pip version 19.0.3, however version 20.0.2 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.
Collecting en_core_web_lg==2.2.5 from https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.2.5/en_core_web_lg-2.2.5.tar.gz#egg=en_core_web_lg==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.2.5/en_core_web_lg-2.2.5.tar.gz (827.9MB)
Installing collected packages: en-core-web-lg
  Running setup.py install for en-core-web-lg: started
    Running setup.py install for en-core-web-lg: finished with status 'done'
Successfully installed en-core-web-lg-2.2.5
✔ Download and installation successful
You can now load the model via spacy.load('en_core_web_lg')
You are using pip version 19.0.3, however version 20.0.2 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [9]:
import spacy
import en_core_web_lg

spacy.prefer_gpu()
nlp = en_core_web_lg.load()

In [24]:
# analysing proper nouns in the validation dataset
import json
from spacy.matcher import Matcher

# def on_match(matcher, doc, id, matches):
#     print('Matched!', matches)

pattern = [{'POS': 'PROPN'}]  # look for proper nouns
matcher = Matcher(nlp.vocab)
# matcher.add("PropNounsInCQA", [pattern], on_match=on_match)  # matcher.add expects a list of list
matcher.add("PropNounsInCQA", [pattern])  # matcher.add expects a list of list

new_valid = []
base_path = "D:/workspace/ASU/Courses/Spring-2020/CSE-576-Topics-in-Natural-Language-Processing/Project-COMMONSENSEQA"
with open(base_path + '/CommonsenseQA/data/CommonsenseQA/valid.jsonl') as f:
    for line in f:
        q = json.loads(line)
        l = []
        l.append(q['question']['question_concept'])
        l.append(q['question']['stem'])
        l += [c['text'] for c in q['question']['choices']]
        doc = nlp(' '.join(l))  # get POS tags for concept + question + choices
        matches = matcher(doc)
        if matches:
            # q['has_propn'] = True
            q['proper_nouns'] = []
        print(doc[:])
        for m in matches:
            print(doc[m[1]:m[2]])
            q['proper_nouns'].append(doc[m[1]:m[2]])

        new_valid.append(json.dumps(q))

with open(base_path + '/CommonsenseQA/data/CommonsenseQA/valid-propn.jsonl', 'w') as f:
    f.write('\n'.join(new_valid))


bank
library
department
store
mall
new
york


TypeError: Object of type Span is not JSON serializable