In [1]:
from transformers import BartTokenizer, BartModel, BartForConditionalGeneration
import json
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

False

In [3]:
with open('config.json', 'r') as jsonfile:
    token_data = json.load(jsonfile)
token = token_data['token']

In [4]:
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large')

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)


In [5]:
user_query = "Who won the FIFA World Cup in the year 1994?"
inputs = tokenizer("<s>" + user_query + "</s>", return_tensors="pt", max_length=1024, truncation=True)

outputs = model.generate(
    **inputs, 
    max_length=100, 
    num_beams=5, 
    early_stopping=True,
    temperature=0.9, 
    no_repeat_ngram_size=2,
    top_k=50, 
    top_p=0.95,
    do_sample=True
)

# Decode the generated response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

Who won the FIFA World Cup in the year 1994?


In [6]:
from transformers import DistilBertTokenizer, DistilBertModel
import torch
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased-distilled-squad')
model = DistilBertModel.from_pretrained('distilbert-base-cased-distilled-squad')

question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

inputs = tokenizer(question, text, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)

print(outputs)

BaseModelOutput(last_hidden_state=tensor([[[ 1.1810, -0.4073,  0.9986,  ..., -0.7445,  0.0380, -0.5510],
         [ 1.6172, -0.6785,  1.6932,  ..., -0.8216, -0.2387, -0.6187],
         [ 2.0840, -0.5496,  1.3313,  ..., -0.7791,  0.1698, -0.3950],
         ...,
         [ 0.2879, -0.1813,  1.2631,  ..., -0.2022,  0.4699,  0.5535],
         [ 0.6069, -0.1943,  0.7584,  ..., -0.5106, -0.4027, -0.4910],
         [ 1.0183, -0.8215,  0.9088,  ..., -0.8094,  0.8372, -0.2027]]]), hidden_states=None, attentions=None)


In [7]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

model_name = 't5-small'  # You can replace 't5-small' with 't5-base', 't5-large', 't5-3b', or 't5-11b' based on your needs and computational resources
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

question = "Who won the FIFA World Cup in 1994?"

input_text = f"translate English to French: {question}"

inputs = tokenizer(input_text, return_tensors='pt', max_length=512, truncation=True)

# Generate the answer
output_sequences = model.generate(
    input_ids=inputs['input_ids'],
    attention_mask=inputs['attention_mask'],
    max_length=256,  # You can adjust this based on the length of the response you expect
    temperature=0.7,  # Adjusting temperature controls the randomness of the output
    num_return_sequences=1  # Number of output sequences to generate
)

answer = tokenizer.decode(output_sequences[0], skip_special_tokens=True)

print(answer)


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Qui a gagné la Coupe du monde de la FIFA en 1994?
