In [1]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Загрузка токенизатора и модели
tokenizer = T5Tokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
model = T5ForConditionalGeneration.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")

# Пример использования
input_text = "question: What is AI? context: Artificial intelligence is a branch of computer science."
input_ids = tokenizer.encode(input_text, return_tensors="pt")

# Генерация ответа
outputs = model.generate(input_ids)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Answer:", answer)

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Answer: Artificial intelligence


In [3]:
import torch
torch.cuda.is_available()

True

In [4]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# Проверка доступности GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Загрузка токенизатора и модели
tokenizer = T5Tokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
model = T5ForConditionalGeneration.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer").to(device)

# Пример использования
input_text = "question: What is AI? context: Artificial intelligence is a branch of computer science."
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)  # Перенос входных данных на GPU

# Генерация ответа
outputs = model.generate(input_ids)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Answer:", answer)


Using device: cuda
Answer: Artificial intelligence


In [5]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# Проверка доступности GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Загрузка токенизатора и модели
tokenizer = T5Tokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
model = T5ForConditionalGeneration.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer").to(device)

# Тестовые данные: пары "контекст" и "эталонный ответ"
samples = [
    {
        "context": "Artificial intelligence is a branch of computer science that aims to create systems capable of intelligent behavior.",
        "expected_answer": "Artificial intelligence is a branch of computer science"
    },
    {
        "context": "The Eiffel Tower is located in Paris and was built in 1889 as the entrance arch for the World's Fair.",
        "expected_answer": "Paris"
    },
    {
        "context": "Water boils at 100 degrees Celsius under standard atmospheric pressure.",
        "expected_answer": "100 degrees Celsius"
    },
    {
        "context": "The mitochondria is often referred to as the powerhouse of the cell.",
        "expected_answer": "mitochondria"
    },
    {
        "context": "The Great Wall of China is over 13,000 miles long and was built to protect Chinese states from invasions.",
        "expected_answer": "over 13,000 miles"
    },
    {
        "context": "Isaac Newton formulated the laws of motion and universal gravitation in the 17th century.",
        "expected_answer": "Isaac Newton"
    },
    {
        "context": "The Amazon rainforest produces more than 20% of the world's oxygen supply.",
        "expected_answer": "Amazon rainforest"
    },
    {
        "context": "Python is a high-level, interpreted programming language known for its readability and wide range of libraries.",
        "expected_answer": "Python"
    },
    {
        "context": "Photosynthesis is the process by which green plants use sunlight to synthesize nutrients from carbon dioxide and water.",
        "expected_answer": "Photosynthesis"
    },
    {
        "context": "The speed of light in a vacuum is approximately 299,792 kilometers per second.",
        "expected_answer": "299,792 kilometers per second"
    }
]

correct = 0

for i, sample in enumerate(samples):
    input_text = f"context: {sample['context']}"
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
    outputs = model.generate(input_ids, max_length=100)
    output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Разделим на вопрос и ответ
    if "<sep>" in output:
        try:
            question, predicted_answer = output.split("<sep>")
        except:
            predicted_answer = output.strip()
    else:
        predicted_answer = output.strip()

    expected = sample['expected_answer'].lower()
    predicted = predicted_answer.lower()

    is_correct = expected in predicted or predicted in expected
    correct += int(is_correct)

    print(f"\nSample {i + 1}")
    print("Generated Answer:", predicted_answer)
    print("Expected Answer :", sample['expected_answer'])
    print("✅ Correct" if is_correct else "❌ Incorrect")

accuracy = correct / len(samples) * 100
print(f"\nAccuracy: {accuracy:.2f}%")


Using device: cuda

Sample 1
Generated Answer: What is artificial intelligence? a branch of computer science
Expected Answer : Artificial intelligence is a branch of computer science
❌ Incorrect

Sample 2
Generated Answer: When was the Eiffel Tower built? 1889
Expected Answer : Paris
❌ Incorrect

Sample 3
Generated Answer: What is the standard atmospheric pressure? atmospheric pressure
Expected Answer : 100 degrees Celsius
❌ Incorrect

Sample 4
Generated Answer: What is the powerhouse of the cell? The mitochondria
Expected Answer : mitochondria
✅ Correct

Sample 5
Generated Answer: How many miles long is the Great Wall of China? over 13,000 miles
Expected Answer : over 13,000 miles
✅ Correct

Sample 6
Generated Answer: What century did Isaac Newton write his laws? 17th
Expected Answer : Isaac Newton
✅ Correct

Sample 7
Generated Answer: What percentage of the world's oxygen supply does the Amazon produce? 20%
Expected Answer : Amazon rainforest
❌ Incorrect

Sample 8
Generated Answer: W

In [None]:
lecture_notes = """
Artificial Intelligence (AI) is a branch of computer science focused on creating intelligent machines that can perform tasks typically requiring human intelligence.
These tasks include learning, reasoning, problem-solving, perception, and language understanding.
One of the earliest successful AI applications is expert systems, which mimic the decision-making abilities of human experts.
Modern AI heavily relies on machine learning, especially deep learning, where artificial neural networks are used to model complex patterns in data.
AI has a wide range of applications, including self-driving cars, virtual assistants, medical diagnostics, and financial forecasting.
"""


In [6]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import random

device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = T5Tokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
model = T5ForConditionalGeneration.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer").to(device)

lecture_notes = """
Artificial Intelligence (AI) is a branch of computer science focused on creating intelligent machines that can perform tasks typically requiring human intelligence.
These tasks include learning, reasoning, problem-solving, perception, and language understanding.
One of the earliest successful AI applications is expert systems, which mimic the decision-making abilities of human experts.
Modern AI heavily relies on machine learning, especially deep learning, where artificial neural networks are used to model complex patterns in data.
AI has a wide range of applications, including self-driving cars, virtual assistants, medical diagnostics, and financial forecasting.
"""

# Разделим на 3-4 предложения (логически)
contexts = lecture_notes.strip().split("\n")

questions = []

for i, context in enumerate(contexts):
    input_text = f"context: {context}"
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
    outputs = model.generate(input_ids, max_length=100)
    qa = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if "?" in qa:
        question, answer = qa.split("?")
        question = question.strip() + "?"
        answer = answer.strip()
    else:
        continue  # если нет корректного вопроса

    # Генерация distractors вручную (можно заменить на генерацию через другую модель)
    distractors = [
        "robotics", "mathematics", "physics", "statistics",
        "chemistry", "economics", "quantum computing", "human resources"
    ]
    options = [answer] + random.sample(distractors, 3)
    random.shuffle(options)
    correct_option = ["A", "B", "C", "D"][options.index(answer)]

    questions.append({
        "question": question,
        "options": {
            "A": options[0],
            "B": options[1],
            "C": options[2],
            "D": options[3]
        },
        "answer": correct_option
    })

# Выводим
for idx, q in enumerate(questions):
    print(f"\nQuestion {idx+1}: {q['question']}")
    for opt, val in q["options"].items():
        print(f"  {opt}) {val}")
    print(f"Correct Answer: {q['answer']}")



Question 1: What is the branch of computer science focused on creating intelligent machines?
  A) mathematics
  B) physics
  C) chemistry
  D) Artificial Intelligence
Correct Answer: D

Question 2: What are some of the tasks that students are asked to do in a context?
  A) mathematics
  B) physics
  C) robotics
  D) learning, reasoning, problem-solving, perception, and language understanding
Correct Answer: D

Question 3: What is one of the earliest successful AI applications?
  A) human resources
  B) quantum computing
  C) chemistry
  D) expert systems
Correct Answer: D

Question 4: What is deep learning used for?
  A) mathematics
  B) to model complex patterns in data
  C) chemistry
  D) statistics
Correct Answer: B

Question 5: What is one of the applications of AI?
  A) quantum computing
  B) self-driving cars
  C) human resources
  D) chemistry
Correct Answer: B


In [7]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Загрузка модели и токенизатора
model_name = "potsawee/t5-large-generation-race-Distractor"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Входной текст для генерации отвлекающих вариантов
input_text = "question: What is the capital of France? context: France is a country in Europe. Its capital is Paris."

# Токенизация входного текста
inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)

# Генерация отвлекающих вариантов
outputs = model.generate(
    inputs["input_ids"],
    max_length=50,
    num_return_sequences=3,  # Количество отвлекающих вариантов
    num_beams=5,            # Использование beam search
    early_stopping=True
)

# Декодирование и вывод результатов
distractors = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
print("Generated distractors:")
for i, distractor in enumerate(distractors, 1):
    print(f"{i}: {distractor}")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Generated distractors:
1: Paris
2: Paris.
3: New York
