## **Checking the python'version**

In [None]:
!python --version

Python 3.9.16


## **Creating a new conda environment and installing the Anaconda distribution of Python version 3.9 into it**

In [None]:
!conda create -n py39 python=3.9 anaconda --yes
!source /opt/conda/bin/activate py39 && conda install -c py39 python -y

Traceback (most recent call last):
  File "/opt/conda/bin/conda", line 14, in <module>
    from conda.cli import main
ModuleNotFoundError: No module named 'conda'
Traceback (most recent call last):
  File "/opt/conda/bin/conda", line 14, in <module>
    from conda.cli import main
ModuleNotFoundError: No module named 'conda'


In [None]:
!sudo rm /opt/conda/bin/python3
!sudo ln -sf /opt/conda/envs/py39/bin/python3 /opt/conda/bin/python3

In [None]:
!sudo rm /opt/conda/bin/python3.7
!sudo ln -sf /opt/conda/envs/py39/bin/python3 /opt/conda/bin/python3.7

In [None]:
!sudo rm /opt/conda/bin/python
!sudo ln -s /opt/conda/envs/py39/bin/python3 /opt/conda/bin/python

In [None]:
!python --version

Python 3.9.16


## **Installing the requirements**

In [None]:
!pip install transformers
!pip install bitsandbytes
!pip install accelerate

[0m

## **Importing libraries**

In [None]:
import numpy as np
import pandas as pd
import os

import transformers

import torch
import torch.nn.functional as F
from torch import nn
from torch.cuda.amp import custom_fwd, custom_bwd

from bitsandbytes.functional import quantize_blockwise, dequantize_blockwise

from tqdm.auto import tqdm
from accelerate import init_empty_weights


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 113
CUDA SETUP: Loading binary /opt/conda/envs/py39/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda113.so...


  warn(msg)


## **Traversing through the directory path '/kaggle/input' and printing out the names of all files within the directory**

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/nlu-data/data.csv


## **FrozenBNBLinear represents a linear transformation layer that uses quantized weights and biases**


In [None]:
class FrozenBNBLinear(nn.Module):
    def __init__(self, weight, absmax, code, bias=None):
        assert isinstance(bias, nn.Parameter) or bias is None
        super().__init__()
        self.out_features, self.in_features = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        self.register_buffer("absmax", absmax.requires_grad_(False))
        self.register_buffer("code", code.requires_grad_(False))
        self.adapter = None
        self.bias = bias

    def forward(self, input):
        output = DequantizeAndLinear.apply(input, self.weight, self.absmax, self.code, self.bias)
        if self.adapter:
            output_cloned = torch.clone(output + self.adapter(input))
            return output_cloned
        else :
            return output

    @classmethod
    def from_linear(cls, linear: nn.Linear) -> "FrozenBNBLinear":
        weights_int8, state = quantize_blockise_lowmemory(linear.weight)
        return cls(weights_int8, *state, linear.bias)

    def __repr__(self):
        return f"{self.__class__.__name__}({self.in_features}, {self.out_features})"


## **DequantizeAndLinear performs the dequantization step for the FrozenBNBLinear layer**

In [None]:
class DequantizeAndLinear(torch.autograd.Function):
    @staticmethod
    @custom_fwd
    def forward(ctx, input: torch.Tensor, weights_quantized: torch.ByteTensor,
                absmax: torch.FloatTensor, code: torch.FloatTensor, bias: torch.FloatTensor):
        weights_deq = dequantize_blockwise(weights_quantized, absmax=absmax, code=code)
        ctx.save_for_backward(input, weights_quantized, absmax, code)
        ctx._has_bias = bias is not None
        return F.linear(input, weights_deq, bias)

    @staticmethod
    @custom_bwd
    def backward(ctx, grad_output: torch.Tensor):
        assert not ctx.needs_input_grad[1] and not ctx.needs_input_grad[2] and not ctx.needs_input_grad[3]
        input, weights_quantized, absmax, code = ctx.saved_tensors
        # grad_output: [*batch, out_features]
        weights_deq = dequantize_blockwise(weights_quantized, absmax=absmax, code=code)
        grad_input = grad_output @ weights_deq
        grad_bias = grad_output.flatten(0, -2).sum(dim=0) if ctx._has_bias else None
        return grad_input, None, None, None, grad_bias

## **FrozenBNBEmbedding represents an embedding layer. In fact, the forward method first dequantizes the quantized weights and then applies the embedding**

In [None]:
class FrozenBNBEmbedding(nn.Module):
    def __init__(self, weight, absmax, code):
        super().__init__()
        self.num_embeddings, self.embedding_dim = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        self.register_buffer("absmax", absmax.requires_grad_(False))
        self.register_buffer("code", code.requires_grad_(False))
        self.adapter = None

    def forward(self, input, **kwargs):
        with torch.no_grad():
            # note: both quantuized weights and input indices are *not* differentiable
            weight_deq = dequantize_blockwise(self.weight, absmax=self.absmax, code=self.code)
            output = F.embedding(input, weight_deq, **kwargs)
        if self.adapter:

            output_cloned = torch.clone(output + self.adapter(input))
            return output_cloned
        else :
            return output

    @classmethod
    def from_embedding(cls, embedding: nn.Embedding) -> "FrozenBNBEmbedding":
        weights_int8, state = quantize_blockise_lowmemory(embedding.weight)
        return cls(weights_int8, *state)

    def __repr__(self):
        return f"{self.__class__.__name__}({self.num_embeddings}, {self.embedding_dim})"


## **quantize_blockise_lowmemory is a function that quantizes a PyTorch tensor using blockwise quantization**

In [None]:
def quantize_blockise_lowmemory(matrix: torch.Tensor, chunk_size: int = 2 ** 20):
    assert chunk_size % 4096 == 0
    code = None
    chunks = []
    absmaxes = []
    flat_tensor = matrix.view(-1)
    for i in range((matrix.numel() - 1) // chunk_size + 1):
        input_chunk = flat_tensor[i * chunk_size: (i + 1) * chunk_size].clone()
        quantized_chunk, (absmax_chunk, code) = quantize_blockwise(input_chunk, code=code)
        chunks.append(quantized_chunk)
        absmaxes.append(absmax_chunk)

    matrix_i8 = torch.cat(chunks).reshape_as(matrix)
    absmax = torch.cat(absmaxes)
    return matrix_i8, (absmax, code)

## **convert_to_int8 is a function that converts all linear and embedding layers in a given PyTorch model to 8-bit with optional adapters**

In [None]:
def convert_to_int8(model):
    """Convert linear and embedding modules to 8-bit with optional adapters"""
    for module in list(model.modules()):
        for name, child in module.named_children():
            if isinstance(child, nn.Linear):
                print(name, child)
                setattr(
                    module,
                    name,
                    FrozenBNBLinear(
                        weight=torch.zeros(child.out_features, child.in_features, dtype=torch.uint8),
                        absmax=torch.zeros((child.weight.numel() - 1) // 4096 + 1),
                        code=torch.zeros(256),
                        bias=child.bias,
                    ),
                )
            elif isinstance(child, nn.Embedding):
                setattr(
                    module,
                    name,
                    FrozenBNBEmbedding(
                        weight=torch.zeros(child.num_embeddings, child.embedding_dim, dtype=torch.uint8),
                        absmax=torch.zeros((child.weight.numel() - 1) // 4096 + 1),
                        code=torch.zeros(256),
                    )
                )


## **GPTJBlock represents a single block of the GPT-J transformer model**

In [None]:
class GPTJBlock(transformers.models.gptj.modeling_gptj.GPTJBlock):
    def __init__(self, config):
        super().__init__(config)

        convert_to_int8(self.attn)
        convert_to_int8(self.mlp)

## **GPTJModel that inherits from the GPTJModel class provided by the transformers library**

In [None]:
class GPTJModel(transformers.models.gptj.modeling_gptj.GPTJModel):
    def __init__(self, config):
        super().__init__(config)
        convert_to_int8(self)


## **GPTJForCausalLM is a class created based on an existing class GPTJForCausalLM in the transformers library and it is used for generating text using a GPT-J model**

In [None]:
class GPTJForCausalLM(transformers.models.gptj.modeling_gptj.GPTJForCausalLM):
    def __init__(self, config):
        super().__init__(config)
        convert_to_int8(self)


transformers.models.gptj.modeling_gptj.GPTJBlock = GPTJBlock

##**Importing the configuration and tokenizer for the EleutherAI GPT-J 6B model using the Hugging Face Transformers library**

In [None]:
config = transformers.GPTJConfig.from_pretrained("EleutherAI/gpt-j-6B")
tokenizer = transformers.AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

## **Initializing a GPTJForCausalLM model that has been fine-tuned on the EleutherAI GPT-J 6B model**

In [None]:
gpt = GPTJForCausalLM.from_pretrained("hivemind/gpt-j-6B-8bit", low_cpu_mem_usage=True)

# checking if a GPU is available and set the device accordingly
if torch.cuda.is_available():
    dev = "cuda:0"
else:
    dev = "cpu"
device = torch.device(dev)

gpt.to(device) #move it to the specified device

k_proj Linear(in_features=4096, out_features=4096, bias=False)
v_proj Linear(in_features=4096, out_features=4096, bias=False)
q_proj Linear(in_features=4096, out_features=4096, bias=False)
out_proj Linear(in_features=4096, out_features=4096, bias=False)
fc_in Linear(in_features=4096, out_features=16384, bias=True)
fc_out Linear(in_features=16384, out_features=4096, bias=True)
k_proj Linear(in_features=4096, out_features=4096, bias=False)
v_proj Linear(in_features=4096, out_features=4096, bias=False)
q_proj Linear(in_features=4096, out_features=4096, bias=False)
out_proj Linear(in_features=4096, out_features=4096, bias=False)
fc_in Linear(in_features=4096, out_features=16384, bias=True)
fc_out Linear(in_features=16384, out_features=4096, bias=True)
k_proj Linear(in_features=4096, out_features=4096, bias=False)
v_proj Linear(in_features=4096, out_features=4096, bias=False)
q_proj Linear(in_features=4096, out_features=4096, bias=False)
out_proj Linear(in_features=4096, out_features=4096, 

GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): FrozenBNBEmbedding(50400, 4096)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-27): 28 x GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): FrozenBNBLinear(4096, 4096)
          (v_proj): FrozenBNBLinear(4096, 4096)
          (q_proj): FrozenBNBLinear(4096, 4096)
          (out_proj): FrozenBNBLinear(4096, 4096)
        )
        (mlp): GPTJMLP(
          (fc_in): FrozenBNBLinear(4096, 16384)
          (fc_out): FrozenBNBLinear(16384, 4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): FrozenBNBLinear(4096, 50400)
)

## **Loading data**

In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd

# Load the data
data = pd.read_csv('/kaggle/input/nlu-data/data.csv')


data['sentence'] = '[questions]:'+data['questions']+'\n[answers]:'+data['answers']
data=data['sentence']
print(data.iloc[1])


[questions]:Comment les gens contractent-ils le virus ?
[answers]:Le virus se transmet d’homme à homme aussi facilement que celui de la grippe saisonnière normale, lorsque des personnes infectées toussent ou éternuent et que les gouttelettes infectées sont inhalées ou contaminent les mains ou des surfaces.
Pour prévenir la propagation de la maladie, les personnes malades doivent se couvrir le nez et la bouche lorsqu’elles toussent ou éternuent, rester chez elles si elles ne se sentent pas bien, se laver les mains régulièrement et se tenir autant que possible à l’écart des personnes bien portantes.
On n’a connaissance d’aucun cas d’infection humaine consécutive à une exposition à des porcs ou à d’autres animaux.


## **Split the data into train and test sets**

In [None]:
train, test = train_test_split(data, test_size=0.01)
train.to_csv('/train.csv', index=False)
test.to_csv('/test.csv', index=False)

In [None]:
from datasets import load_dataset
dataset = load_dataset('csv', data_files={'train': '/train.csv',
                                              'test': '/test.csv'})

Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-99f953c4a20e12da/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-99f953c4a20e12da/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['sentence'],
        num_rows: 144
    })
    test: Dataset({
        features: ['sentence'],
        num_rows: 2
    })
})

In [None]:
device # first available CUDA device

device(type='cuda', index=0)

## **Setting the padding token of the tokenizer to be the end-of-sequence token**

In [None]:
tokenizer.pad_token = tokenizer.eos_token

## **Preparing the input data for feeding into the GPT-J model by converting the text into tokens that can be used as input to the model**

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding=True, truncation=True, max_length= 128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(["sentence"])
tokenized_datasets.set_format("torch")

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

## **Creating a PyTorch DataLoader object named train_dataloader from a tokenized dataset named full_train_dataset**

In [None]:
from torch.utils.data import DataLoader

full_train_dataset = tokenized_datasets["train"]
train_dataloader = DataLoader(full_train_dataset, shuffle=False, batch_size=8)

## **Adding adapter layers to a given GPTJ model**

In [None]:
def add_adapters(model, adapter_dim=4, p = 0.1):
    assert adapter_dim > 0

    for name, module in model.named_modules():
      if isinstance(module, FrozenBNBLinear):
          if "attn" in name or "mlp" in name or "head" in name:
              print("Adding adapter to", name)
              module.adapter = nn.Sequential(
                nn.Linear(module.in_features, adapter_dim, bias=False),
                nn.Dropout(p=p),
                nn.Linear(adapter_dim, module.out_features, bias=False),
            )
              print("Initializing", name)
              nn.init.zeros_(module.adapter[2].weight)

          else:
              print("Not adding adapter to", name)
      elif isinstance(module, FrozenBNBEmbedding):
          print("Adding adapter to", name)
          module.adapter = nn.Sequential(
                nn.Embedding(module.num_embeddings, adapter_dim),
                nn.Dropout(p=p),
                nn.Linear(adapter_dim, module.embedding_dim, bias=False),
            )
          print("Initializing", name)
          nn.init.zeros_(module.adapter[2].weight)

add_adapters(gpt)
gpt.to(device)

Adding adapter to transformer.wte
Initializing transformer.wte
Adding adapter to transformer.h.0.attn.k_proj
Initializing transformer.h.0.attn.k_proj
Adding adapter to transformer.h.0.attn.v_proj
Initializing transformer.h.0.attn.v_proj
Adding adapter to transformer.h.0.attn.q_proj
Initializing transformer.h.0.attn.q_proj
Adding adapter to transformer.h.0.attn.out_proj
Initializing transformer.h.0.attn.out_proj
Adding adapter to transformer.h.0.mlp.fc_in
Initializing transformer.h.0.mlp.fc_in
Adding adapter to transformer.h.0.mlp.fc_out
Initializing transformer.h.0.mlp.fc_out
Adding adapter to transformer.h.1.attn.k_proj
Initializing transformer.h.1.attn.k_proj
Adding adapter to transformer.h.1.attn.v_proj
Initializing transformer.h.1.attn.v_proj
Adding adapter to transformer.h.1.attn.q_proj
Initializing transformer.h.1.attn.q_proj
Adding adapter to transformer.h.1.attn.out_proj
Initializing transformer.h.1.attn.out_proj
Adding adapter to transformer.h.1.mlp.fc_in
Initializing transfor

GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): FrozenBNBEmbedding(50400, 4096)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-27): 28 x GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): FrozenBNBLinear(4096, 4096)
          (v_proj): FrozenBNBLinear(4096, 4096)
          (q_proj): FrozenBNBLinear(4096, 4096)
          (out_proj): FrozenBNBLinear(4096, 4096)
        )
        (mlp): GPTJMLP(
          (fc_in): FrozenBNBLinear(4096, 16384)
          (fc_out): FrozenBNBLinear(16384, 4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): FrozenBNBLinear(4096, 50400)
)

In [None]:
from bitsandbytes.optim import Adam8bit

# enable gradient checkpointing
gpt.gradient_checkpointing_enable()
#optimizer is initialized with a learning rate of 1e-5 and a weight decay of 0.01
optimizer = Adam8bit(gpt.parameters(), lr=1e-5, weight_decay=0.01)

In [None]:
num_epochs = 5
num_training_steps = num_epochs * len(train_dataloader)

## **Initializing a learning rate scheduler that gradually increases the learning rate from 0 to its maximum value, then keeps it constant until the end of training**

In [None]:
lr_scheduler = transformers.get_linear_schedule_with_warmup(
    optimizer, int(num_training_steps*0.1), num_training_steps
)

In [None]:
filepath = '/kaggle/working/model.pt'

## **Training a GPT model using PyTorch and the Hugging Face Transformers library**

In [None]:
from tqdm.auto import tqdm

scaler = torch.cuda.amp.GradScaler()
progress_bar = tqdm(range(num_training_steps))
gpt.train()
gpt.gradient_checkpointing_enable()
k = 0

for epoch in range(num_epochs):
    for batch in train_dataloader:
        k = k + 1
        if k % 500 == 0:

          print(k)
          state = {'k' : k, 'epoch': num_epochs, 'lr_scheduler': lr_scheduler.state_dict(), 'state_dict': gpt.state_dict(), 'optimizer': optimizer.state_dict()}
          torch.save(state, filepath)

        batch = {k: v.to(device) for k, v in batch.items()}

        optimizer.zero_grad()


        with torch.autograd.profiler.record_function("model_inference"):
            with torch.cuda.amp.autocast():

                out = gpt.forward(**batch,)

                loss = F.cross_entropy(out.logits[:, :-1, :].flatten(0, -2), batch['input_ids'][:, 1:].flatten(),
                                  reduction='mean', label_smoothing=0.1)

        print(loss)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(gpt.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()

        lr_scheduler.step()
        progress_bar.update(1)


  0%|          | 0/90 [00:00<?, ?it/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


tensor(3.6727, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.2753, device='cuda:0', grad_fn=<AddBackward0>)
tensor(5.2208, device='cuda:0', grad_fn=<AddBackward0>)
tensor(5.9448, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.0377, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.9986, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.1293, device='cuda:0', grad_fn=<AddBackward0>)
tensor(6.3937, device='cuda:0', grad_fn=<AddBackward0>)
tensor(5.2045, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.4952, device='cuda:0', grad_fn=<AddBackward0>)
tensor(5.3439, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.6936, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.2281, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.3033, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.2001, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4.1796, device='cuda:0', grad_fn=<AddBackward0>)
tensor(5.0952, device='cuda:0', grad_fn=<AddBackward0>)
tensor(5.1793, device='cuda:0', grad_fn=<AddBack

## **Using GPT-J to generate text given a prompt**

In [None]:
gpt.eval()
for sentence in test.values:
    print("**************************************************************")
    st = sentence.split('[answers]:')[0].strip()
    print(st)
    with torch.no_grad():
        prompt = tokenizer(st, truncation=True, padding=True, max_length=128, return_tensors='pt')
        prompt = {key: value.to(device) for key, value in prompt.items()}
        out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
        print('\n')
        print("GPT-J :" , tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


**************************************************************
[questions]:Quel est le taux de la retenue à la source à appliquer sur la TVA relative aux montants supérieurs à 1000 DT payés par les services de l’Etat à partir du 01/01/2016?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.




GPT-J : [questions]:Quel est le taux de la retenue à la source à appliquer sur la TVA relative aux montants supérieurs à 1000 DT payés par les services de l’Etat à partir du 01/01/2016?
comments.answers:Commentaires : Aucun commentaire...<|endoftext|>
**************************************************************
[questions]:Quelles sont les obligations relatives aux factures et aux titres de mouvement prévues par la législation fiscale?


GPT-J : [questions]:Quelles sont les obligations relatives aux factures et aux titres de mouvement prévues par la législation fiscale?[/questions]

I would like to know, in what case a customer is not required to pay any taxes during the payment of an installment contract? Example: The seller and buyer are under the same taxation jurisdiction; it seems there should be no taxes on this transaction ; does anybody has more informations about the subject or is their some document explaining the rights for tax evasion in such type situation.<|endoftext|

In [None]:
torch.save(gpt.state_dict(), '/kaggle/working/gpt-j-6B.pt')

In [None]:
# Set the model to evaluation mode
gpt.eval()
# Generate text for the given prompt
with torch.no_grad():
  # Encode the prompt using the tokenizer and move it to the device
  prompt = tokenizer("[questions]:Who is the health minister of Tunisia ?", truncation=True, padding=True, max_length=128, return_tensors='pt')
  prompt = {key: value.to(device) for key, value in prompt.items()}
  # Generate text using the GPT-J model
  out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
  # Decode the generated text using the tokenizer and print it
  print(tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[questions]:Who is the health minister of Tunisia?
1 : Is it Mr.Driss Mhirsi?He was a physician who died in 2014.[link]http://enews7news.com/driss-mhiri... [question source: askapremedicineforme.com][hr]<|endoftext|>


In [None]:
gpt.eval()
with torch.no_grad():
  prompt = tokenizer("[questions]:How does the tax office check??", truncation=True, padding=True, max_length=128, return_tensors='pt')
  prompt = {key: value.to(device) for key, value in prompt.items()}
  out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
  print(tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[questions]:How does the tax office check??.The question is not about my situation but how it works?
I am paying already only half of the total price...<|endoftext|>


In [None]:
gpt.eval()
with torch.no_grad():
  prompt = tokenizer("[questions]:What is influenza A(H1N1)?", truncation=True, padding=True, max_length=128, return_tensors='pt')
  prompt = {key: value.to(device) for key, value in prompt.items()}
  out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
  print(tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[questions]:What is influenza A(H1N1)?How does one contract it?
Is there any cure for Influenza a (H1 N1)<|endoftext|>
