# **COMET-atomic-2020 사용법**


#### **Google Drive Mount**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### **Install and Import**

In [2]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 4.3 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 97.9 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 66.0 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.2 transformers-4.24.0


In [3]:
import os
import json
import torch
import argparse

import pandas as pd

from tqdm import tqdm
from pathlib import Path
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

#### **Device Setting**

In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
!nvidia-smi

Tue Nov 15 03:44:03 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P8    10W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

#### **Install COMET-ATOMIC2020 with BART**

In [5]:
!git clone https://github.com/allenai/comet-atomic-2020.git

Cloning into 'comet-atomic-2020'...
remote: Enumerating objects: 190, done.[K
remote: Counting objects: 100% (77/77), done.[K
remote: Compressing objects: 100% (38/38), done.[K
remote: Total 190 (delta 56), reused 42 (delta 39), pack-reused 113[K
Receiving objects: 100% (190/190), 7.15 MiB | 6.40 MiB/s, done.
Resolving deltas: 100% (74/74), done.


In [None]:
%cd /content/comet-atomic-2020/models/comet_atomic2020_bart
!pip install -r requirements.txt
!bash download_model.sh

In [31]:
!python ./generation_example.py

model loading ...
model loaded
data loading ...
data loaded
Data Preprocessing with COMET:   0% 0/51 [00:00<?, ?it/s]
---- AtLocation processing ----


0it [00:00, ?it/s][A
2644it [00:00, 26434.98it/s][A
5288it [00:00, 26338.44it/s][A
9989it [00:00, 26499.53it/s]

---- AtLocation completed ----

Data Preprocessing with COMET:   2% 1/51 [55:15<46:02:46, 3315.33s/it]
---- CapableOf processing ----


0it [00:00, ?it/s][A
2601it [00:00, 26008.35it/s][A
5300it [00:00, 26580.31it/s][A
9989it [00:00, 26615.27it/s]

---- CapableOf completed ----

Data Preprocessing with COMET:   4% 2/51 [1:52:51<46:15:23, 3398.45s/it]
---- Causes processing ----


0it [00:00, ?it/s][A
2583it [00:00, 25823.62it/s][A
5275it [00:00, 26465.94it/s][A
9989it [00:00, 26464.25it/s]

---- Causes completed ----

Data Preprocessing with COMET:   6% 3/51 [2:48:07<44:48:20, 3360.43s/it]
---- CausesDesire processing ----


0it [00:00, ?it/s][A
2543it [00:00, 25421.24it/s][A
5216it [00:00, 26187.11it/s][A
9989it 

## **파일 내용 수정**
- 모델을 불러오는 방법을 알아내기 힘들어서 불가피하게 아래 내용을 복사합니다.
- 'generate_example.py' 에 붙여 넣으시면 됩니다.

In [None]:
"""
import os
import json
import torch
import argparse

import pandas as pd

from tqdm import tqdm
from pathlib import Path
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from utils import calculate_rouge, use_task_specific_params, calculate_bleu_score, trim_batch


def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i : i + n]


class Comet:
    def __init__(self, model_path):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        task = "summarization"
        use_task_specific_params(self.model, task)
        self.batch_size = 1
        self.decoder_start_token_id = None

    def generate(
            self, 
            queries,
            decode_method="beam", 
            num_generate=5, 
            ):

        with torch.no_grad():
            examples = queries

            decs = []
            for batch in list(chunks(examples, self.batch_size)):

                batch = self.tokenizer(batch, return_tensors="pt", truncation=True, padding="max_length").to(self.device)
                input_ids, attention_mask = trim_batch(**batch, pad_token_id=self.tokenizer.pad_token_id)

                summaries = self.model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    decoder_start_token_id=self.decoder_start_token_id,
                    num_beams=num_generate,
                    num_return_sequences=num_generate,
                    )

                dec = self.tokenizer.batch_decode(summaries, skip_special_tokens=True, clean_up_tokenization_spaces=False)
                decs.append(dec)

            return decs


all_relations = [
    "AtLocation",
    "CapableOf",
    "Causes",
    "CausesDesire",
    "CreatedBy",
    "DefinedAs",
    "DesireOf",
    "Desires",
    "HasA",
    "HasFirstSubevent",
    "HasLastSubevent",
    "HasPainCharacter",
    "HasPainIntensity",
    "HasPrerequisite",
    "HasProperty",
    "HasSubEvent",
    "HasSubevent",
    "HinderedBy",
    "InheritsFrom",
    "InstanceOf",
    "IsA",
    "LocatedNear",
    "LocationOfAction",
    "MadeOf",
    "MadeUpOf",
    "MotivatedByGoal",
    "NotCapableOf",
    "NotDesires",
    "NotHasA",
    "NotHasProperty",
    "NotIsA",
    "NotMadeOf",
    "ObjectUse",
    "PartOf",
    "ReceivesAction",
    "RelatedTo",
    "SymbolOf",
    "UsedFor",
    "isAfter",
    "isBefore",
    "isFilledBy",
    "oEffect",
    "oReact",
    "oWant",
    "xAttr",
    "xEffect",
    "xIntent",
    "xNeed",
    "xReact",
    "xReason",
    "xWant",
    ]

if __name__ == "__main__":
    # sample usage (reproducing AAAI)
    print("model loading ...")
    comet = Comet("./comet-atomic_2020_BART_aaai")
    comet.model.zero_grad()
    print("model loaded")

    print("data loading ...")
    data_path = '/content/drive/MyDrive/dacon_sentiment_analysis/dataset'
    data_csv = pd.read_csv(os.path.join(data_path, 'train.csv'))

    print('data loaded')

    queries = []

    for rel in tqdm(all_relations, desc=f'Data Preprocessing with COMET'):
        print(f'\n---- {rel} processing ----\n')
        queries = []

        for i, row in tqdm(data_csv.iterrows()):
            query = '{} {}'.format(row['Utterance'], rel)
            queries.append(query)

        results = comet.generate(queries, decode_method='greedy', num_generate=1)
        results = [r[0] for r in results]

        data_csv[rel] = results

        print(f'\n---- {rel} completed ----\n')

    data_csv.to_csv('./comet_train.csv', index=False)
"""