## Loading dataset and preprocessing

In [1]:
# setting squad attributes

squad_v2 = False

In [2]:
# loading dataset
from datasets import load_dataset, load_metric
squad_dataset = load_dataset('squad_v2' if squad_v2 else 'squad')

Found cached dataset squad (/home/ap/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


  0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
#exploring dataset

squad_dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 87599
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 10570
    })
})

In [4]:
# exploring dataset

squad_dataset['train'][100]

{'id': '573387acd058e614000b5cb5',
 'title': 'University_of_Notre_Dame',
 'context': 'One of the main driving forces in the growth of the University was its football team, the Notre Dame Fighting Irish. Knute Rockne became head coach in 1918. Under Rockne, the Irish would post a record of 105 wins, 12 losses, and five ties. During his 13 years the Irish won three national championships, had five undefeated seasons, won the Rose Bowl in 1925, and produced players such as George Gipp and the "Four Horsemen". Knute Rockne has the highest winning percentage (.881) in NCAA Division I/FBS football history. Rockne\'s offenses employed the Notre Dame Box and his defenses ran a 7–2–2 scheme. The last game Rockne coached was on December 14, 1930 when he led a group of Notre Dame all-stars against the New York Giants in New York City.',
 'question': 'In what year did the team lead by Knute Rockne win the Rose Bowl?',
 'answers': {'text': ['1925'], 'answer_start': [354]}}

In [5]:
# flattening dataset

flattened_data = squad_dataset.flatten()

flattened_data

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers.text', 'answers.answer_start'],
        num_rows: 87599
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers.text', 'answers.answer_start'],
        num_rows: 10570
    })
})

In [6]:
# exploring flattened dataset

L = iter(flattened_data['train'])
example = []

for l in range(3):
    print(next(L))

{'id': '5733be284776f41900661182', 'title': 'University_of_Notre_Dame', 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.', 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?', 'answers.text': ['Saint Bernadette Soubirous'], 'answers.answer_start': [515]}
{'id': '5733be284776f4190066117f', 'title': 'Univer

In [7]:
flattened_data['train'][100]['answers.text'][0]

'1925'

In [8]:
# function for processing data

def questions(story):
    dic = {}
    dic['questions'] = story['question']
    dic['sentences'] = story['context']
    dic['answer'] = story['answers.text'][0]
    return dic

In [9]:
processed = flattened_data.map(questions)

Loading cached processed dataset at /home/ap/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-c89ebdf5b9e19c9a.arrow
Loading cached processed dataset at /home/ap/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-26eafb999c28f671.arrow


In [10]:
processed['train'][1]

{'id': '5733be284776f4190066117f',
 'title': 'University_of_Notre_Dame',
 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
 'question': 'What is in front of the Notre Dame Main Building?',
 'answers.text': ['a copper statue of Christ'],
 'answers.answer_start': [188],
 'questions': 'What is in front of the Notre Dame Main Building?',
 '

In [11]:
processed['validation'][2]

{'id': '56be4db0acb8001400a502ee',
 'title': 'Super_Bowl_50',
 'context': 'Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi\'s Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.',
 'question': 'Where did Super Bowl 50 take place?',
 'answers.text': ['Santa Clara, California',
  "Levi's Stadium",
  "Levi's Stadium in the San 

In [12]:
# finding index for answer in sentence

def get_start_end_idx(story):
    
    answer_str = story['answer']
    str_idx = story['sentences'].find(answer_str)
    end_idx = str_idx + len(answer_str)
    return {'str_idx' : str_idx, 
            'end_idx' : end_idx}

In [13]:
processed = processed.map(get_start_end_idx)

Loading cached processed dataset at /home/ap/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-78923f8dbd2e702f.arrow
Loading cached processed dataset at /home/ap/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-8bc1ba70ac3cfcaa.arrow


In [14]:
num = 2
print(processed['validation'][num])
start_idx = processed['validation'][num]['str_idx']
end_idx = processed['validation'][num]['end_idx']
print('answer:', processed['validation'][num]['sentences'][start_idx:end_idx])

{'id': '56be4db0acb8001400a502ee', 'title': 'Super_Bowl_50', 'context': 'Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi\'s Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.', 'question': 'Where did Super Bowl 50 take place?', 'answers.text': ['Santa Clara, California', "Levi's Stadium", "Levi's Stadium in the San Francisc

## Tokenize sentences

In [15]:
# import DistilBertTokenizer

from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('tokenizer/')

In [16]:
# tokenize and align

def tokenize_align(example):
    encoding = tokenizer(example['sentences'], example['questions'], truncation=True, max_length=tokenizer.model_max_length, padding="max_length")
    start_positions = encoding.char_to_token(example['str_idx'])
    end_positions = encoding.char_to_token(example['end_idx']-1)
    if start_positions is None:
        start_positions = tokenizer.model_max_length
    if end_positions is None:
        end_positions = tokenizer.model_max_length
    return {'input_ids': encoding['input_ids'],
            'attention_mask': encoding['attention_mask'],
            'start_positions': start_positions,
            'end_positions': end_positions}

In [17]:
qa_dataset = processed.map(tokenize_align)

Map:   0%|          | 0/87599 [00:00<?, ? examples/s]

Loading cached processed dataset at /home/ap/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-92870b1866b6bbb0.arrow


In [18]:
#exploring tokenized dataset

qa_dataset['train'][1]

{'id': '5733be284776f4190066117f',
 'title': 'University_of_Notre_Dame',
 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
 'question': 'What is in front of the Notre Dame Main Building?',
 'answers.text': ['a copper statue of Christ'],
 'answers.answer_start': [188],
 'questions': 'What is in front of the Notre Dame Main Building?',
 '

In [19]:
# removing columns no longer required

qa_dataset = qa_dataset.remove_columns(['id', 'title', 'context', 'question', 'answers.text', 'answers.answer_start'])
qa_dataset['train'][1]

{'questions': 'What is in front of the Notre Dame Main Building?',
 'sentences': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
 'answer': 'a copper statue of Christ',
 'str_idx': 188,
 'end_idx': 213,
 'input_ids': [101,
  6549,
  2135,
  1010,
  1996,
  2082,
  2038,
  1037,
  3234,
  2839,
  1012,
  10234,
  1996,
  2364,
  2311,
  1005,
  10

## Training

In [20]:
# train test data

from torch.utils.data import DataLoader
train_ds = qa_dataset['train']
test_ds = qa_dataset['validation']


columns_to_return = ['input_ids', 'attention_mask', 'start_positions', 'end_positions']
train_ds.set_format(type='pt', columns=columns_to_return)
test_ds.set_format(type='pt', columns=columns_to_return)

In [21]:
#exploring training data
train_ds[1]

{'input_ids': tensor([  101,  6549,  2135,  1010,  1996,  2082,  2038,  1037,  3234,  2839,
          1012, 10234,  1996,  2364,  2311,  1005,  1055,  2751,  8514,  2003,
          1037,  3585,  6231,  1997,  1996,  6261,  2984,  1012,  3202,  1999,
          2392,  1997,  1996,  2364,  2311,  1998,  5307,  2009,  1010,  2003,
          1037,  6967,  6231,  1997,  4828,  2007,  2608,  2039, 14995,  6924,
          2007,  1996,  5722,  1000,  2310,  3490,  2618,  4748,  2033, 18168,
          5267,  1000,  1012,  2279,  2000,  1996,  2364,  2311,  2003,  1996,
         13546,  1997,  1996,  6730,  2540,  1012,  3202,  2369,  1996, 13546,
          2003,  1996, 24665, 23052,  1010,  1037, 14042,  2173,  1997,  7083,
          1998,  9185,  1012,  2009,  2003,  1037, 15059,  1997,  1996, 24665,
         23052,  2012, 10223, 26371,  1010,  2605,  2073,  1996,  6261,  2984,
         22353,  2135,  2596,  2000,  3002, 16595,  9648,  4674,  2061, 12083,
          9711,  2271,  1999,  8517,  1

In [22]:
# defining compute metrics

from sklearn.metrics import f1_score

def compute_metrics(pred):
    start_labels = pred.label_ids[0]
    start_preds = pred.predictions[0].argmax(-1)
    end_labels = pred.label_ids[1]
    end_preds = pred.predictions[1].argmax(-1)
    
    f1_start = f1_score(start_labels, start_preds, average='macro')
    f1_end = f1_score(end_labels, end_preds, average='macro')
    
    return {
        'f1_start': f1_start,
        'f1_end' : f1_end,
    }

In [23]:
# importing pre-trained DistilBERT

from transformers import AutoModelForQuestionAnswering

model_checkpoint = 'distilbert-base-uncased'
model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)

2023-11-25 03:05:36.890424: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForQuestionAnswering: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClas

In [1]:
# Finetuning the model

from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='results',
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_dir=None,
    logging_steps=50
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics
)

trainer.train()

SyntaxError: unmatched ')' (3171775928.py, line 15)

In [26]:
trainer.evaluate(test_ds)

{'eval_loss': 1.2128028869628906,
 'eval_f1_start': 0.5664197104045473,
 'eval_f1_end': 0.5913603224300138}

In [27]:
trainer.model.save_pretrained('./qamodel_trained')

In [28]:
import torch

torch.cuda.is_available()

True

In [31]:
# setting up model to use GPU if available otherwise use CPU

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model.to(device)

questions, text = 'Why did the revenue increase?','Revenue increased as the the company signed a new contract with Macro. Margin deteriorated as the contract was offered at a discount.'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

Why did the revenue increase? As the the company signed a new contract with macro


In [32]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model.to(device)

questions, text = 'Why did the margin decrease?','Revenue increased as the the company signed a new contract with Macro. Margin decreased as the contract was offered at a discount.'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

Why did the margin decrease? As the contract was offered at a discount


In [34]:
mda_ms_2013 = """Revenue increased, primarily due to higher revenue from Server and Tools as well as revenue from new products and services, including Windows 8, Surface, and the new Office, offset in part by the impact on revenue of a decline in the x86 PC market. 
Operating income grew, primarily due to the $6.2 billion goodwill impairment charge related to our OSD business recorded during the prior year. Other key changes in cost of revenue and operating expenses were:
Cost of revenue increased $2.7 billion or 16%, reflecting increased product costs associated with Surface and Windows 8, including an approximately $900 million charge for Surface RT inventory adjustments, higher headcount-related expenses, payments made to Nokia related to joint strategic initiatives, royalties on Xbox LIVE content, and retail stores expenses, offset in part by decreased costs associated with lower sales of Xbox 360 consoles and decreased traffic acquisition costs.
Sales and marketing expenses increased $1.4 billion or 10%, reflecting advertising of Windows 8 and Surface.
Research and development expenses increased $600 million or 6%, due mainly to higher headcount-related expenses, largely related to the Entertainment and Devices Division.
General and administrative expenses increased $580 million or 13%, due to higher legal charges, primarily the EU fine of $733 million."""

In [36]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model.to(device)

text = mda_ms_2013

questions = 'why did the revenue increase?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

why did the revenue increase? Higher revenue from server and tools as well as revenue from new products and services , including windows 8 , surface , and the new office


In [37]:
questions = 'what happened to Research and development expenses?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

what happened to Research and development expenses? Increased $ 600 million or 6 %


In [38]:
questions = 'why did Research and development expenses increase?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

why did Research and development expenses increase? Higher head ##co ##unt - related expenses


In [39]:
questions = 'How much was the charge for surface RT inventory adjustment?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

How much was the charge for surface RT inventory adjustment? $ 900 million


In [42]:
questions = 'How much was the EU fine?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

How much was the EU fine? $ 73 ##3 million


In [2]:
from transformers import DistilBertForQuestionAnswering

model = DistilBertForQuestionAnswering.from_pretrained("qamodel_trained")

2023-11-25 20:21:53.065537: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('tokenizer/')
import torch

In [3]:
mda_apple_2023 = """iPhone net sales decreased 2% or $4.9 billion during 2023 compared to 2022 due to lower net sales of non-Pro iPhone models,
partially offset by higher net sales of Pro iPhone models.
Mac net sales decreased 27% or $10.8 billion during 2023 compared to 2022 due primarily to lower net sales of laptops.
iPad net sales decreased 3% or $1.0 billion during 2023 compared to 2022 due primarily to lower net sales of iPad mini and iPad
Air, partially offset by the combined net sales of iPad 9th and 10th generation.
Services net sales increased 9% or $7.1 billion during 2023 compared to 2022 due to higher net sales across all lines of
business.
Products gross margin percentage increased during 2023 compared to 2022 due to cost savings and a different Products mix,
partially offset by the weakness in foreign currencies relative to the U.S. dollar and decreased leverage.
Services gross margin percentage decreased during 2023 compared to 2022 due to higher Services costs and the weakness in
foreign currencies relative to the U.S. dollar, partially offset by a different Services mix."""

In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model.to(device)

text = mda_apple_2023

questions = 'why happened to iPhone net sales?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

why happened to iPhone net sales? Lower net sales of non - pro iphone models


In [11]:
questions = 'How much did Mac net sales decrease?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

How much did Mac net sales decrease? 27 %


In [18]:
questions = 'What happened to iPad net sales?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

What happened to iPad net sales? Decreased 3 % or $ 1 . 0 billion


In [20]:
questions = 'Why did iPad net sales decrease?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

Why did iPad net sales decrease? Lower net sales of ipad mini and ipad air , partially offset by the combined net sales of ipad 9th and 10th generation


In [34]:
questions = 'What was the impact of foreign currencies?'

input_dict = tokenizer(text, questions, return_tensors='pt')

input_ids = input_dict['input_ids'].to(device)
attention_mask = input_dict['attention_mask'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

start_logits = outputs[0]
end_logits = outputs[1]

all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])

print(questions, answer.capitalize())

What was the impact of foreign currencies? Weakness
