# Uploading materials

In [1]:
!wget https://qasper-dataset.s3.us-west-2.amazonaws.com/qasper-train-dev-v0.3.tgz

--2024-06-18 10:49:51--  https://qasper-dataset.s3.us-west-2.amazonaws.com/qasper-train-dev-v0.3.tgz
Resolving qasper-dataset.s3.us-west-2.amazonaws.com (qasper-dataset.s3.us-west-2.amazonaws.com)... 52.92.204.242, 52.218.233.241, 52.218.217.137, ...
Connecting to qasper-dataset.s3.us-west-2.amazonaws.com (qasper-dataset.s3.us-west-2.amazonaws.com)|52.92.204.242|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10835856 (10M) [application/gzip]
Saving to: ‘qasper-train-dev-v0.3.tgz’


2024-06-18 10:49:52 (24.5 MB/s) - ‘qasper-train-dev-v0.3.tgz’ saved [10835856/10835856]



In [2]:
!gunzip /content/qasper-train-dev-v0.3.tgz

In [3]:
!tar -xvf /content/qasper-train-dev-v0.3.tar

qasper-train-v0.3.json
qasper-dev-v0.3.json
README.md


In [4]:
import json
import pandas as pd

In [5]:
with open('/content/qasper-train-v0.3.json', 'r') as js_file:
  train_data = json.load(js_file)

In [6]:
documents = list(train_data.keys())

In [7]:
titles = []
abstracts = []
full_texts = []

for id in documents:
  one_document = train_data[id]
  titles.append(one_document['title'])
  abstracts.append(one_document['abstract'])
  text = []
  for text_element in one_document['full_text']:
    text.extend(text_element['paragraphs'])

  full_texts.append(' '.join(text))

In [8]:
data_texts = {
    'ids': documents,
    'titles': titles,
    'abstracts': abstracts,
    'full_texts': full_texts
}
df_texts = pd.DataFrame(data_texts)

In [9]:
df_texts

Unnamed: 0,ids,titles,abstracts,full_texts
0,1909.00694,Minimally Supervised Learning of Affective Eve...,Recognizing affective events that trigger posi...,Affective events BIBREF0 are events that typic...
1,2003.07723,"PO-EMO: Conceptualization, Annotation, and Mod...",Most approaches to emotion analysis regarding ...,"1.1em 1.1.1em 1.1.1.1em Thomas Haider$^{1,3}$,..."
2,1705.09665,Community Identity and User Engagement in a Mu...,A community's identity defines and shapes its ...,"“If each city is like a game of chess, the day..."
3,1908.06606,Question Answering based Clinical Text Structu...,Clinical text structuring is a critical and fu...,Clinical text structuring (CTS) is a critical ...
4,1811.00942,Progress and Tradeoffs in Neural Language Models,"In recent years, we have witnessed a dramatic ...",Deep learning has unquestionably advanced the ...
...,...,...,...,...
883,1702.03274,Hybrid Code Networks: practical and efficient ...,End-to-end learning of recurrent neural networ...,Task-oriented dialog systems help a user to ac...
884,1610.03112,Leveraging Recurrent Neural Networks for Multi...,Social norms are shared rules that govern and ...,Social norms are informal understandings that ...
885,1607.03542,Open-Vocabulary Semantic Parsing with both Dis...,Traditional semantic parsers map language onto...,Semantic parsing is the task of mapping a phra...
886,1812.10860,Can You Tell Me How to Get Past Sesame Street?...,Natural language understanding has recently se...,State-of-the-art models for natural language p...


In [10]:
def get_questions(data, id, id_list, questions, evidences, extractive_spans,
                  free_form_answer, answerability, highlighted_evidences):
  for element in data[id]['qas']:
    question = element['question']
    ans = element['answers']
    for one_answer in ans:
      answer = one_answer['answer']
      answerability.append(answer['unanswerable'])
      extractive_spans.append(answer['extractive_spans'])
      free_form_answer.append(answer['free_form_answer'])
      evidences.append(answer['evidence'])
      highlighted_evidences.append(answer['highlighted_evidence'])
      questions.append(question)
    ids = [id] * len(ans)
    id_list.extend(ids)

  return

In [11]:
questions = []
evidences = []
extractive_spans = []
free_form_answer = []
answerability = []
highlighted_evidences = []
id_list = []
for id in documents:
  get_questions(train_data, id, id_list, questions, evidences, extractive_spans,
                  free_form_answer, answerability, highlighted_evidences)

In [12]:
data_questions = {
    'text_id': id_list,
    'questions': questions,
    'answerability': answerability,
    'extractive_spans': extractive_spans,
    'free_form_answer': free_form_answer,
    'evidences': evidences,
    'highlighted_evidences': highlighted_evidences
}

In [13]:
df_questions = pd.DataFrame(data_questions)

In [14]:
df_questions

Unnamed: 0,text_id,questions,answerability,extractive_spans,free_form_answer,evidences,highlighted_evidences
0,1909.00694,What is the seed lexicon?,False,[],a vocabulary of positive and negative predicat...,[The seed lexicon consists of positive and neg...,[The seed lexicon consists of positive and neg...
1,1909.00694,What is the seed lexicon?,False,[seed lexicon consists of positive and negativ...,,[The seed lexicon consists of positive and neg...,[The seed lexicon consists of positive and neg...
2,1909.00694,What are the results?,False,[],Using all data to train: AL -- BiGRU achieved ...,[FLOAT SELECTED: Table 3: Performance of vario...,[FLOAT SELECTED: Table 3: Performance of vario...
3,1909.00694,How are relations used to propagate polarity?,False,[],"based on the relation between events, the sugg...","[In this paper, we propose a simple and effect...","[As illustrated in Figure FIGREF1, our key ide..."
4,1909.00694,How are relations used to propagate polarity?,False,[],cause relation: both events in the relation sh...,"[In this paper, we propose a simple and effect...","[As illustrated in Figure FIGREF1, our key ide..."
...,...,...,...,...,...,...,...
2670,1607.03542,What task do they evaluate on?,False,[],Fill-in-the-blank natural language questions,[We demonstrate our approach on the task of an...,[We demonstrate our approach on the task of an...
2671,1812.10860,Do some pretraining objectives perform better ...,False,[],,"[Looking to other target tasks, the grammar-re...","[Looking to other target tasks, the grammar-re..."
2672,1712.02121,Did the authors try stacking multiple convolut...,False,[],,"[Recently, convolutional neural networks (CNNs...","[In ConvE, only $v_h$ and $v_r$ are reshaped a..."
2673,1712.02121,How many feature maps are generated for a give...,False,[],3 feature maps for a given tuple,[FLOAT SELECTED: Figure 1: Process involved in...,[FLOAT SELECTED: Figure 1: Process involved in...


In [15]:
df_questions = df_questions[df_questions['answerability'] != True]

# Model

In [None]:
import torch

In [16]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

tokenizer = AutoTokenizer.from_pretrained("Intel/dynamic_tinybert")
model = AutoModelForQuestionAnswering.from_pretrained("Intel/dynamic_tinybert")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/351 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [None]:
df_questions.head(4)

Unnamed: 0,text_id,questions,answerability,extractive_spans,free_form_answer,evidences,highlighted_evidences
0,1909.00694,What is the seed lexicon?,False,[],a vocabulary of positive and negative predicat...,[The seed lexicon consists of positive and neg...,[The seed lexicon consists of positive and neg...
1,1909.00694,What is the seed lexicon?,False,[seed lexicon consists of positive and negativ...,,[The seed lexicon consists of positive and neg...,[The seed lexicon consists of positive and neg...
2,1909.00694,What are the results?,False,[],Using all data to train: AL -- BiGRU achieved ...,[FLOAT SELECTED: Table 3: Performance of vario...,[FLOAT SELECTED: Table 3: Performance of vario...
3,1909.00694,How are relations used to propagate polarity?,False,[],"based on the relation between events, the sugg...","[In this paper, we propose a simple and effect...","[As illustrated in Figure FIGREF1, our key ide..."


In [17]:
# попытка использовать bert

In [18]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [None]:
inputs = tokenizer(
    df_questions[2:6]["questions"].tolist(),
    contexts,
    max_length=100,
    truncation="only_second",
    stride=50,
    return_overflowing_tokens=True,
    return_offsets_mapping=True,
)

# T5

In [19]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Загрузка модели для генерации текста
model_name = "google-t5/t5-base"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

In [None]:
!pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/309.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/309.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from t

In [None]:
!pip install transformers[torch]

In [None]:
from transformers import TrainingArguments

In [None]:
# !pip install "transformers==4.27.1" "datasets==2.9.0" "accelerate==0.17.1" "evaluate==0.4.0" tensorboard scikit-learn

In [None]:
training_args = TrainingArguments(
                  output_dir="./results",  # Directory to save checkpoints
                  num_train_epochs=3,  # Number of training epochs
                  per_device_train_batch_size=16,  # Batch size per GPU/TPU core
                  per_device_eval_batch_size=8,  # Batch size for evaluation
                  evaluation_strategy="epoch",  # Evaluate after each epoch
                  save_strategy="epoch",  # Save checkpoints after each epoch
                  load_best_model_at_end=True,  # Load the best model after training
                  learning_rate=2e-5,
                  warmup_steps=500,
                  logging_steps=100,
                  save_total_limit=2
                        )

In [None]:
!pip show accelerate

Name: accelerate
Version: 0.31.0
Summary: Accelerate
Home-page: https://github.com/huggingface/accelerate
Author: The HuggingFace team
Author-email: zach.mueller@huggingface.co
License: Apache
Location: /usr/local/lib/python3.10/dist-packages
Requires: huggingface-hub, numpy, packaging, psutil, pyyaml, safetensors, torch
Required-by: 


In [None]:
extracted = df_questions[df_questions['extractive_spans'].apply(len) > 0]

extracted.head(3)

Unnamed: 0,text_id,questions,answerability,extractive_spans,free_form_answer,evidences,highlighted_evidences
1,1909.00694,What is the seed lexicon?,False,[seed lexicon consists of positive and negativ...,,[The seed lexicon consists of positive and neg...,[The seed lexicon consists of positive and neg...
7,1909.00694,What are labels available in dataset for super...,False,"[negative, positive]",,[Affective events BIBREF0 are events that typi...,"[In this paper, we work on recognizing the pol..."
11,1909.00694,How large is raw corpus used for training?,False,[100 million sentences],,"[As a raw corpus, we used a Japanese web corpu...","[As a raw corpus, we used a Japanese web corpu..."


In [None]:
input_text = []

In [None]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(extracted, test_size=0.2, random_state=42)

In [None]:
def create_input(row):
  question = row['questions']
  answer = ' '.join(row['extractive_spans'])
  context = ' '.join(row['evidences'])
  input_text.append({
      'question': question,
      'context': context,
      'answer': answer
  })

In [None]:
train.apply(lambda x: create_input(x), axis=1)

1863    None
2316    None
1953    None
721     None
2179    None
        ... 
802     None
809     None
1125    None
1908    None
689     None
Length: 1090, dtype: object

In [None]:
questions_train = [f"question: {x['question']} context: {x['context']}" for x in input_text]
answers_train = [x['answer'] for x in input_text]

In [None]:
input_text = []
test.apply(lambda x: create_input(x), axis=1)

704     None
820     None
141     None
2109    None
1636    None
        ... 
771     None
1317    None
152     None
857     None
2468    None
Length: 273, dtype: object

In [None]:
questions_test = [f"question: {x['question']} context: {x['context']}" for x in input_text]
answers_test = [x['answer'] for x in input_text]

In [None]:
inputs_train = tokenizer(questions_train,
                   padding='max_length',
                   truncation=True,
                   max_length=512,
                   return_tensors='pt')

labels_train = tokenizer(answers_train,
                  padding='max_length',
                  truncation=True,
                  max_length=512,
                  return_tensors='pt').input_ids

In [None]:
inputs_test = tokenizer(questions_test,
                   padding='max_length',
                   truncation=True,
                   max_length=512,
                   return_tensors='pt')

labels_test = tokenizer(answers_test,
                  padding='max_length',
                  truncation=True,
                  max_length=512,
                  return_tensors='pt').input_ids

In [None]:
train_dataset = {
           'input_ids': inputs_train.input_ids,
           'attention_mask': inputs_train.attention_mask,
           'labels': labels_train
       }

test_dataset = {
           'input_ids': inputs_test.input_ids,
           'attention_mask': inputs_test.attention_mask,
           'labels': labels_test
       }

In [None]:
!pip install sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/106.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━[0m [32m71.7/106.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-2.8.2 sacrebleu-2.4.2


In [None]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [None]:
import numpy as np
from sklearn.metrics import f1_score
import rouge
import sacrebleu

In [None]:
# def compute_metrics(pred):
#           labels = pred.label_ids
#           preds = pred.predictions.argmax(-1)
#           # bleu = sacrebleu.corpus_bleu(preds, labels)
#           # rouge = rouge_score.rouge_n(preds, labels, n=2)
#           em = np.mean(preds == labels)
#           f1 = f1_score(preds, labels, average='macro')
#           return {
#               "bleu": bleu.score,
#               "rouge": rouge,
#               "em": em,
#               "f1": f1
#           }

def compute_metrics(pred):
          labels = pred.label_ids
          preds = pred.predictions.argmax(-1)
          em = np.mean(preds == labels)
          f1 = f1_score(preds, labels, average='macro')
          return {
              "em": em,
              "f1": f1
          }

In [None]:
train_dataset

{'input_ids': array([[ 822,   10,  363, ...,    0,    0,    0],
        [ 822,   10,  363, ...,    0,    0,    0],
        [ 822,   10,  571, ...,    0,    0,    0],
        ...,
        [ 822,   10, 4073, ...,    0,    0,    0],
        [ 822,   10,  571, ...,    0,    0,    0],
        [ 822,   10,  363, ...,    0,    0,    0]]),
 'attention_mask': array([[1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        ...,
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0]]),
 'labels': array([[  391, 26260,   427, ...,     0,     0,     0],
        [  262,  7259,  4501, ...,     0,     0,     0],
        [   94,  4396,    38, ...,     0,     0,     0],
        ...,
        [    3,  5091,  5365, ...,     0,     0,     0],
        [  975,  2138,    35, ...,     0,     0,     0],
        [  679,    51,   427, ...,     0,     0,     0]])}

In [None]:
from transformers import Trainer
trainer = Trainer(
       model=model,
       args=training_args,
       train_dataset=train_dataset,
       eval_dataset=test_dataset,
       compute_metrics=compute_metrics
   )

trainer.train()



In [None]:
trainer.evaluate()

# Retry

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments

In [None]:
extracted.head(2)

Unnamed: 0,text_id,questions,answerability,extractive_spans,free_form_answer,evidences,highlighted_evidences
1,1909.00694,What is the seed lexicon?,False,[seed lexicon consists of positive and negativ...,,[The seed lexicon consists of positive and neg...,[The seed lexicon consists of positive and neg...
7,1909.00694,What are labels available in dataset for super...,False,"[negative, positive]",,[Affective events BIBREF0 are events that typi...,"[In this paper, we work on recognizing the pol..."


In [None]:
extracted['answers'] = extracted['extractive_spans'].apply(lambda x: ' '.join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  extracted['answers'] = extracted['extractive_spans'].apply(lambda x: ' '.join(x))


In [None]:
extracted.head(2)

Unnamed: 0,text_id,questions,answerability,extractive_spans,free_form_answer,evidences,highlighted_evidences,answers
1,1909.00694,What is the seed lexicon?,False,[seed lexicon consists of positive and negativ...,,[The seed lexicon consists of positive and neg...,[The seed lexicon consists of positive and neg...,seed lexicon consists of positive and negative...
7,1909.00694,What are labels available in dataset for super...,False,"[negative, positive]",,[Affective events BIBREF0 are events that typi...,"[In this paper, we work on recognizing the pol...",negative positive


In [None]:
data = list(zip(extracted['questions'], extracted['answers']))

In [None]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(data, test_size=0.2)

In [None]:
tokenizer = T5Tokenizer.from_pretrained('t5-base')

def preprocess_function(examples):
    inputs = tokenizer.batch_encode_plus(
        examples[0],
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    labels = tokenizer.batch_encode_plus(
        examples[1],
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )

    inputs['labels'] = labels['input_ids']

    return inputs

train_data = preprocess_function(train_set)
test_data = preprocess_function(test_set)

In [None]:
model = T5ForConditionalGeneration.from_pretrained('t5-base')

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    learning_rate=3e-5,
    evaluation_strategy="steps",
    eval_steps=1000,
    save_strategy="steps",
    save_steps=1000,
    logging_dir="./logs",
    load_best_model_at_end=True,
)

In [None]:
train_data

{'input_ids': tensor([[ 363,  119, 1477,  ...,    0,    0,    0],
        [4047,   18,  476,  ...,    0,    0,    0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), 'labels': tensor([[  571,    19,    48,  ...,     0,     0,     0],
        [    3,  8775, 12062,  ...,     0,     0,     0]])}

In [None]:
train = pd.DataFrame.from_dict(train_data)
test = pd.DataFrame.from_dict(test_data)
test

Unnamed: 0,0
0,input_ids
1,attention_mask
2,labels


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=test,
    tokenizer=tokenizer,
)

trainer.train()





# Another Try

In [None]:
question_answers = extracted[['questions', 'answers']]
question_answers.head(2)

Unnamed: 0,questions,answers
1,What is the seed lexicon?,seed lexicon consists of positive and negative...
7,What are labels available in dataset for super...,negative positive


In [None]:
!pip install simplet5

Collecting simplet5
  Downloading simplet5-0.1.4.tar.gz (7.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers==4.16.2 (from simplet5)
  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.5/3.5 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pytorch-lightning==1.5.10 (from simplet5)
  Downloading pytorch_lightning-1.5.10-py3-none-any.whl (527 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.7/527.7 kB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.4.1 (from pytorch-lightning==1.5.10->simplet5)
  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m868.8/868.8 kB[0m [31m39.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyDeprecate==0.3.1 (from pytorch-lightning==1.5.10->simplet5)
  Downloading pyDeprecate-0.3.1-py3-none-any.whl (10 kB)
Collecting

In [None]:
question_answers.columns = ['source_text', 'target_text']

In [None]:
from simplet5 import SimpleT5

model = SimpleT5()
model.from_pretrained(model_type="t5", model_name="t5-base")
model.train(train_df=question_answers,
            eval_df=question_answers,
            source_max_token_len=128,
            target_max_token_len=50,
            batch_size=16, max_epochs=3, use_gpu=False,
            outputdir = 'trained_t5')

INFO:pytorch_lightning.utilities.seed:Global seed set to 42


Downloading:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/850M [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.distributed:GPU available: False, used: False
INFO:pytorch_lightning.utilities.distributed:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.distributed:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 222 M 
-----------------------------------------------------
222 M     Trainable params
0         Non-trainable params
222 M     Total params
891.614   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  self.pid = os.fork()
INFO:pytorch_lightning.utilities.seed:Global seed set to 42


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
  self.pid = os.fork()


In [None]:
from transformers import T5ForQuestionAnswering

ImportError: cannot import name 'T5ForQuestionAnswering' from 'transformers' (/usr/local/lib/python3.10/dist-packages/transformers/__init__.py)

In [None]:
!pip install transformers

