In [1]:
%pip install datasets transformers[sentencepiece] torch

Collecting torch
  Using cached torch-2.0.0-cp39-cp39-manylinux1_x86_64.whl (619.9 MB)
Collecting nvidia-nccl-cu11==2.14.3
  Using cached nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)
Collecting nvidia-cublas-cu11==11.10.3.66
  Using cached nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)
Collecting nvidia-cudnn-cu11==8.5.0.96
  Using cached nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)
Collecting nvidia-cusolver-cu11==11.4.0.1
  Using cached nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)
Collecting nvidia-cuda-cupti-cu11==11.7.101
  Using cached nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)
Collecting nvidia-cuda-runtime-cu11==11.7.99
  Using cached nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)
Collecting triton==2.0.0
  Using cached triton-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (63.3 MB)
Collecting nvidia-nvtx-cu1

In [2]:
model_dir="/home/g1-s23/dev/nlp-research/bloom/bloom-3b"

In [2]:
from transformers import AutoTokenizer, AutoModel
class Model():
    def __init__(self, model_name,use_gpu=True):
        self.device = "cuda" if use_gpu else "cpu"
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name).to(self.device)

    def save_model(self, path):
        self.model.save_pretrained(path)
    def save_tokenizer(self, path):
        self.tokenizer.save_pretrained(path)
      

    def __call__(self, text):
        inputs=self.tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(self.device)
        return self.model(**inputs)





In [3]:
from transformers import BloomTokenizerFast, BloomForCausalLM
class BloomModel():
    def __init__(self, model_name="bigscience/bloom",use_gpu=True):
        self.device = "cuda" if use_gpu else "cpu"
        self.tokenizer = BloomTokenizerFast.from_pretrained(model_name)
        self.model = BloomForCausalLM.from_pretrained(model_name).to(self.device)
        self.output= None 

    def save_model(self, path):
        self.model.save_pretrained(path)
    def save_tokenizer(self, path):
        self.tokenizer.save_pretrained(path)
    def decode(self, text):
        return self.tokenizer.decode(text)
      

    def __call__(self, text):
        inputs=self.tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(self.device)
        return self.model(**inputs)





In [4]:
# from datasets import load_dataset

# dataset = load_dataset("glue", "mrpc")
model = BloomModel(model_dir,False)
print(model)





<__main__.BloomModel object at 0x7fcbacaad310>


========================
# Model without class
========================

## Preparation
### Models

In [17]:
from transformers import BloomForQuestionAnswering,BloomForCausalLM
casual_lm_model = BloomForCausalLM.from_pretrained(model_dir)
QA_model = BloomForQuestionAnswering.from_pretrained(model_dir)
models={"qa_model":QA_model,"casual":casual_lm_model}

Some weights of BloomForQuestionAnswering were not initialized from the model checkpoint at /home/g1-s23/dev/nlp-research/bloom/bloom-3b and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Tokenizer

In [8]:
from transformers import BloomTokenizerFast
tokenizer = BloomTokenizerFast.from_pretrained(model_dir)

Some weights of BloomForQuestionAnswering were not initialized from the model checkpoint at /home/g1-s23/dev/nlp-research/bloom/bloom-3b and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Experimenting

In [18]:
prompts = ["Q: I would like to order pizza, what types of pizzas do you serve?\n A:"]
result_length = 50
inputs = tokenizer(prompts, return_tensors="pt")

In [19]:
model=casual_lm_model

In [12]:
import torch.nn as nn
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"  # specify which GPU(s) to be used
device = "cpu"#torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = nn.DataParallel(model, device_ids=[0,1])
model.to(device)
inputs["input_ids"] = inputs["input_ids"].to(device)
output = model(**inputs)



In [14]:
output

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[-20.1740, -17.2592, -22.8611, -24.7784, -18.9583, -26.7688, -24.1478,
         -19.7191, -19.6957, -25.0035, -16.9611, -22.8246, -20.6821, -15.8995,
         -21.0736, -13.0851, -23.4245]], grad_fn=<CloneBackward0>), end_logits=tensor([[ 1.0633, 20.5332, 22.2439, 19.7627, 23.1308, 16.8226, 17.1016, 16.6538,
         20.5550, 24.4846, 21.8530, 22.5303, 18.4941, 20.3002, 18.6649, 10.9742,
         14.6803]], grad_fn=<CloneBackward0>), hidden_states=None, attentions=None)

In [16]:
answer = tokenizer.decode(output.start_logits, skip_special_tokens=True)
print(answer)

TypeError: argument 'ids': 'list' object cannot be interpreted as an integer

# Question answering

In [61]:
from datasets import load_dataset

squad = load_dataset("squad", split="train[:5000]")

Downloading builder script:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.36k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.67k [00:00<?, ?B/s]

Downloading and preparing dataset squad/plain_text to /home/g1-s23/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/8.12M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

Dataset squad downloaded and prepared to /home/g1-s23/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453. Subsequent calls will reuse this data.


In [63]:
squad = squad.train_test_split(test_size=0.2)

AttributeError: 'DatasetDict' object has no attribute 'train_test_split'

In [65]:
squad["train"][0]

{'id': '56ce2b2daab44d1400b884f4',
 'title': 'Sino-Tibetan_relations_during_the_Ming_dynasty',
 'context': 'China Daily, a CCP-controlled news organization since 1981, states in a 2008 article that although there were dynastic changes after Tibet was incorporated into the territory of Yuan dynasty\'s China in the 13th century, "Tibet has remained under the jurisdiction of the central government of China." It also states that the Ming dynasty "inherited the right to rule Tibet" from the Yuan dynasty, and repeats the claims in the Mingshi about the Ming establishing two itinerant high commands over Tibet. China Daily states that the Ming handled Tibet\'s civil administration, appointed all leading officials of these administrative organs, and punished Tibetans who broke the law. The party-controlled People\'s Daily, the state-controlled Xinhua News Agency, and the state-controlled national television network China Central Television posted the same article that China Daily had, the only 

In [86]:
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=384,
        truncation="only_second",
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs.pop("offset_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    for i, offset in enumerate(offset_mapping):
        answer = answers[i]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        print("IDX: ",idx,len(sequence_ids))
        while (idx<len(sequence_ids)-1)  and sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label it (0, 0)
        if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

In [87]:
tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

IDX:  10 384
IDX:  14 384
IDX:  13 384
IDX:  7 384
IDX:  15 384
IDX:  12 384
IDX:  13 384
IDX:  7 384
IDX:  13 384
IDX:  14 384
IDX:  14 384
IDX:  14 384
IDX:  10 384
IDX:  11 384
IDX:  9 384
IDX:  12 384
IDX:  11 384
IDX:  9 384
IDX:  13 384
IDX:  10 384
IDX:  15 384
IDX:  13 384
IDX:  7 384
IDX:  13 384
IDX:  14 384
IDX:  11 384
IDX:  14 384
IDX:  12 384
IDX:  9 384
IDX:  28 384
IDX:  10 384
IDX:  9 384
IDX:  18 384
IDX:  14 384
IDX:  11 384
IDX:  19 384
IDX:  9 384
IDX:  10 384
IDX:  10 384
IDX:  16 384
IDX:  12 384
IDX:  10 384
IDX:  15 384
IDX:  9 384
IDX:  13 384
IDX:  13 384
IDX:  15 384
IDX:  9 384
IDX:  6 384
IDX:  6 384
IDX:  9 384
IDX:  18 384
IDX:  12 384
IDX:  21 384
IDX:  9 384
IDX:  10 384
IDX:  11 384
IDX:  15 384
IDX:  11 384
IDX:  9 384
IDX:  13 384
IDX:  14 384
IDX:  14 384
IDX:  17 384
IDX:  14 384
IDX:  7 384
IDX:  10 384
IDX:  11 384
IDX:  13 384
IDX:  19 384
IDX:  13 384
IDX:  10 384
IDX:  23 384
IDX:  18 384
IDX:  11 384
IDX:  14 384
IDX:  14 384
IDX:  10 384
ID

IDX:  16 384
IDX:  12 384
IDX:  9 384
IDX:  21 384
IDX:  10 384
IDX:  12 384
IDX:  10 384
IDX:  8 384
IDX:  8 384
IDX:  12 384
IDX:  11 384
IDX:  11 384
IDX:  10 384
IDX:  18 384
IDX:  17 384
IDX:  9 384
IDX:  9 384
IDX:  9 384
IDX:  18 384
IDX:  13 384
IDX:  17 384
IDX:  21 384
IDX:  11 384
IDX:  8 384
IDX:  12 384
IDX:  12 384
IDX:  14 384
IDX:  9 384
IDX:  18 384
IDX:  9 384
IDX:  7 384
IDX:  17 384
IDX:  11 384
IDX:  16 384
IDX:  16 384
IDX:  13 384
IDX:  15 384
IDX:  9 384
IDX:  10 384
IDX:  11 384
IDX:  13 384
IDX:  10 384
IDX:  9 384
IDX:  9 384
IDX:  10 384
IDX:  14 384
IDX:  11 384
IDX:  12 384
IDX:  16 384
IDX:  27 384
IDX:  8 384
IDX:  8 384
IDX:  10 384
IDX:  17 384
IDX:  12 384
IDX:  9 384
IDX:  16 384
IDX:  20 384
IDX:  7 384
IDX:  23 384
IDX:  14 384
IDX:  11 384
IDX:  6 384
IDX:  14 384
IDX:  11 384
IDX:  9 384
IDX:  19 384
IDX:  10 384
IDX:  10 384
IDX:  11 384
IDX:  12 384
IDX:  15 384
IDX:  7 384
IDX:  11 384
IDX:  11 384
IDX:  12 384
IDX:  14 384
IDX:  16 384
IDX:  

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

IDX:  12 384
IDX:  10 384
IDX:  12 384
IDX:  8 384
IDX:  10 384
IDX:  8 384
IDX:  16 384
IDX:  14 384
IDX:  18 384
IDX:  13 384
IDX:  9 384
IDX:  10 384
IDX:  17 384
IDX:  12 384
IDX:  14 384
IDX:  11 384
IDX:  14 384
IDX:  18 384
IDX:  11 384
IDX:  10 384
IDX:  6 384
IDX:  13 384
IDX:  24 384
IDX:  15 384
IDX:  10 384
IDX:  13 384
IDX:  22 384
IDX:  16 384
IDX:  11 384
IDX:  13 384
IDX:  12 384
IDX:  19 384
IDX:  16 384
IDX:  8 384
IDX:  8 384
IDX:  19 384
IDX:  9 384
IDX:  5 384
IDX:  18 384
IDX:  14 384
IDX:  12 384
IDX:  21 384
IDX:  12 384
IDX:  13 384
IDX:  18 384
IDX:  6 384
IDX:  9 384
IDX:  14 384
IDX:  10 384
IDX:  10 384
IDX:  10 384
IDX:  11 384
IDX:  12 384
IDX:  16 384
IDX:  13 384
IDX:  10 384
IDX:  13 384
IDX:  6 384
IDX:  16 384
IDX:  19 384
IDX:  11 384
IDX:  16 384
IDX:  11 384
IDX:  19 384
IDX:  10 384
IDX:  8 384
IDX:  11 384
IDX:  17 384
IDX:  5 384
IDX:  14 384
IDX:  13 384
IDX:  12 384
IDX:  15 384
IDX:  10 384
IDX:  16 384
IDX:  11 384
IDX:  13 384
IDX:  11 384

In [38]:
import torch
torch.cuda.is_available()

True

In [105]:
torch.cuda.empty_cache()