In [None]:
#pip install adapter-transformers

In [None]:
#pip install -U adapters

In [None]:
#pip install --upgrade transformers

In [1]:
from datasets import load_dataset
import pandas as pd

# Set display options for Pandas
pd.set_option('display.max_colwidth', None)  # No truncation of column content
pd.set_option('display.width', None)         # No truncation of DataFrame display width

In [2]:
# Load a specific SuperGLUE task (e.g., the BoolQ task for binary question answering)
dataset = load_dataset('super_glue', 'boolq', trust_remote_code=True)

In [3]:
dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'passage', 'idx', 'label'],
        num_rows: 9427
    })
    validation: Dataset({
        features: ['question', 'passage', 'idx', 'label'],
        num_rows: 3270
    })
    test: Dataset({
        features: ['question', 'passage', 'idx', 'label'],
        num_rows: 3245
    })
})

In [4]:
# Convert the train split to a pandas DataFrame
train_df = pd.DataFrame(dataset['train'])

In [5]:
train_df.shape

(9427, 4)

In [6]:
train_df.sample(5)

Unnamed: 0,question,passage,idx,label
109,can i use dexron iii for dexron ii,"DEXRON -- In 1993, GM released new Dexron-III fluid (GM Spec GM6417M and later GMN10055). It is generally backward-compatible with transmissions using earlier Dexron fluids or Type-A/Suffix-A fluid. However this specification failed to address a number of issues concerning long term durability such as shear stability and fluid oxidation. Dexron-III underwent a number of iterations in an attempt to address various shortcomings but was eventually replaced by new thinking i.e. DEXRON-VI.",109,1
2636,is the garden of gethsemane the same as the mount of olives,"Gethsemane -- Gethsemane (Greek: Γεθσημανή, Gethsemane; Hebrew: גת שמנים‎‎, Gat Shmanim; Syriac: ܓܕܣܡܢ‎, Gaḏ Šmānê, lit. ``oil press'') is an urban garden at the foot of the Mount of Olives in Jerusalem, most famous as the place where Jesus prayed and his disciples slept the night before his crucifixion; i.e. the site recorded as where the agony in the garden took place.",2636,0
600,is there a season 2 of hunted on cinemax,"Hunted (2012 TV series) -- In early 2015, Frank Spotnitz stated that the series--and spinoff--had been officially cancelled by Cinemax, though he and George were open to continuing the project if it were to be picked up by another network.",600,0
8583,is harmony of the seas the biggest cruise ship in the world,"MS Harmony of the Seas -- MS Harmony of the Seas is an Oasis-class cruise ship built by STX France at the Chantiers de l'Atlantique shipyard in Saint-Nazaire, France for Royal Caribbean International. With a gross tonnage of 226,963 GT, she is the second largest passenger ship in the world, surpassing her older sisters Oasis of the Seas and Allure of the Seas, but surpassed by her newer sister Symphony of the Seas.",8583,0
1624,can you use wood bats in college baseball,"College baseball -- Though a wood bat is legal in NCAA competition, players overwhelmingly prefer and use a metal bat. The metal bat was implemented in college baseball in 1975. Use of a metal bat is somewhat controversial. Supporters of an aluminum or composite bat note that it can increase offensive performance, as the speed of a ball off a metal bat is generally faster than off a wood bat. Those against metal, and for wood, argue that a metal bat is not safe to use, and that a metal bat doesn't prepare players for the next level, as professional baseball uses a wood bat exclusively. In the 2011 season the NCAA changed the requirements for a metal bat, reducing the maximum allowed exit speed in a way that is said to produce a feeling more like a wood bat. As a result, in 2011 there was a drop-off in overall ``long'' drives or home runs relative to past years.",1624,1


- Labels: 0 for False and 1 for True.
- The task is binary, so it's a binary classification problem.
- The label corresponds to whether the answer to the question is consistent with the information in the passage.

#### Using a model pre-trained on Boolq

In [17]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [12]:
# Use an existing model trained on BoolQ task
model_name = "bert-base-uncased"  # You can try using a model trained specifically for BoolQ tasks


In [13]:
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
# Prepare a sample question and passage for BoolQ
question = "Does the President of the United States live in the White House?"
passage  = "The White House is the official residence and workplace of the President of the United States."


In [15]:
# Tokenize the input
inputs = tokenizer(question, passage, return_tensors="pt", padding=True, truncation=True)

# Get model predictions
with torch.no_grad():
    outputs = model(**inputs)
    logits  = outputs.logits
    
    prediction = torch.argmax(logits, dim=-1).item()

# Interpret the result
answer = "Yes" if prediction == 1 else "No"
print(f"Prediction: {answer}")

Prediction: No


In [18]:
# Load the dataset and pre-trained model
dataset = load_dataset("boolq", split="validation")

README.md:   0%|          | 0.00/6.57k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.69M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9427 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3270 [00:00<?, ? examples/s]

In [19]:
dataset

Dataset({
    features: ['question', 'answer', 'passage'],
    num_rows: 3270
})

In [22]:
pd.DataFrame(dataset).sample(5)

Unnamed: 0,question,answer,passage
1248,do c++ strings need to be null terminated,False,"Most modern libraries replace C strings with a structure containing a 32-bit or larger length value (far more than were ever considered for length-prefixed strings), and often add another pointer, a reference count, and even a NUL to speed up conversion back to a C string! Memory is far larger now, such that if the addition of 3 (or 16, or more) bytes to each string is a real problem the software will have to be dealing with so many small strings that some other storage method will save even more memory (for instance there may be so many duplicates that a hash table will use less memory). Examples include the C++ Standard Template Library std::string , the Qt QString , the MFC CString , and the C-based implementation CFString from Core Foundation as well as its Objective-C sibling NSString from Foundation, both by Apple. More complex structures may also be used to store strings such as the rope."
498,can a puerto rican resident vote for president,False,"Voting rights of United States citizens in Puerto Rico, like the voting rights of residents of other United States territories, differ from those of United States citizens in each of the fifty states and the District of Columbia. Residents of Puerto Rico and other U.S. territories do not have voting representation in the United States Congress, and are not entitled to electoral votes for President. The United States Constitution grants congressional voting representation to U.S. states, which Puerto Rico and other U.S. territories are not, specifying that members of Congress shall be elected by direct popular vote and that the President and the Vice President shall be elected by electors chosen by the States."
1412,has brazil ever won the world cup in europe,True,"Brazil is the most successful national team in the history of the World Cup, having won five titles, earning second-place, third-place and fourth-place finishes twice each. Brazil is one of the countries besides Argentina, Spain and Germany to win a FIFA World Cup away from its continent (Sweden 1958, Mexico 1970, USA 1994 and South Korea/Japan 2002). Brazil is the only national team to have played in all FIFA World Cup editions without any absence or need for playoffs. Brazil also has the best overall performance in World Cup history in both proportional and absolute terms with a record of 73 victories in 109 matches played, 124 goal difference, 237 points and only 18 losses."
1542,has an nhl team ever come back from 3-0,True,"The following is the list of teams to overcome 3--1 series deficits by winning three straight games to win a best-of-seven playoff series. In the history of major North American pro sports, teams that were down 3--1 in the series came back and won the series 52 times, more than half of them were accomplished by National Hockey League (NHL) teams. Teams overcame 3--1 deficit in the final championship round eight times, six were accomplished by Major League Baseball (MLB) teams in the World Series. Teams overcoming 3--0 deficit by winning four straight games were accomplished five times, four times in the NHL and once in MLB."
1155,can beet juice show up in your urine,True,"Beeturia is the passing of red or pink urine after eating beetroots or foods colored with beetroot extract or beetroot pigments. The color is caused by the excretion of betalain (betacyanin) pigments such as betanin. The coloring is highly variable between individuals and between different occasions, and can vary in intensity from invisible to strong. The pigment is sensitive to oxidative degradation under strongly acidic conditions. Therefore, the urine coloring depends on stomach acidity and dwell time as well as the presence of protecting substances such as oxalic acid. Beeturia is often associated with red or pink feces."


In [23]:
# Function to get predictions and scores
def get_prediction(question, passage):
    # Tokenize the inputs
    inputs = tokenizer(question, passage, return_tensors="pt", padding=True, truncation=True)
    
    # Get model outputs
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=-1).item()
        score = torch.softmax(logits, dim=-1).max().item()  # Confidence score (softmax probability)
    
    return prediction, score

In [25]:
import random

In [31]:
# Shuffle dataset and pick random samples
shuffled_dataset = dataset.shuffle()  # Shuffle dataset

# Print results for 5 random validation samples
for i in range(5):  # Display 5 random samples from the validation set
    example = shuffled_dataset[i]
    question = example['question']
    passage  = example['passage']
    label    = example['answer']
    
    # Get model prediction and score
    prediction, score = get_prediction(question, passage)
    
    # Display the results
    print(f"Passage:  {passage}")
    print(f"Question: {question}")
    print(f"Label: {'Yes' if label == 1 else 'No'}")
    print(f"Prediction: {'Yes' if prediction == 1 else 'No'}")
    print(f"Score: {score:.4f}")
    print("-" * 80)

Passage:  Calvin Edwin Ripken Jr. (born August 24, 1960), nicknamed ``The Iron Man'', is an American former baseball shortstop and third baseman who played 21 seasons in Major League Baseball (MLB) for the Baltimore Orioles (1981--2001). One of his position's most offensively productive players, Ripken compiled 3,184 hits, 431 home runs, and 1,695 runs batted in during his career, and he won two Gold Glove Awards for his defense. He was a 19-time All-Star and was twice named American League (AL) Most Valuable Player (MVP). Ripken holds the record for consecutive games played, 2,632, surpassing Lou Gehrig's streak of 2,130 that had stood for 56 years and that many deemed unbreakable. In 2007, he was elected into the National Baseball Hall of Fame in his first year of eligibility, and currently has the fourth highest voting percentage of all time (98.53%).
Question: is cal ripken jr in the hall of fame
Label: Yes
Prediction: No
Score: 0.5327
----------------------------------------------