# Document Relevance Discriminator (DRD)

In [None]:
import os
import pandas as pd
import json
import numpy as np
import pickle
import tensorflow as tf
import transformers
from transformers import AutoTokenizer, TFT5ForConditionalGeneration
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, LSTM, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
import datetime

### Loading SQUAD Dataset

In [None]:
# downoading SQUAD 2.0 dataset
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json

--2023-12-20 06:04:02--  https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json
Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 42123633 (40M) [application/json]
Saving to: ‘train-v2.0.json’


2023-12-20 06:04:03 (194 MB/s) - ‘train-v2.0.json’ saved [42123633/42123633]

--2023-12-20 06:04:03--  https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json
Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4370528 (4.2M) [application/json]
Saving to: ‘dev-v2.0.json’


2023-12-20 06:04:03 (58.6 MB/s) - ‘dev-v2.0.json’ saved [4370528/4370528]



In [None]:

def load_squad(location):
# Load the SQuAD JSON dataset into a Python dictionary
    with open(location, 'r', encoding='utf-8') as file:
        squad_data = json.load(file)

    # Initialize lists to store data
    questions = []
    contexts = []
    labels = []

    # Process each example in the dataset
    for example in squad_data["data"]:
        for paragraph in example["paragraphs"]:
            context = paragraph["context"]
            for qa in paragraph["qas"]:
                question = qa["question"]
                is_impossible = qa.get("is_impossible", False)

                # Assign labels (0 for impossible, 1 for answerable)
                label = 0 if is_impossible else 1

                questions.append(question)
                contexts.append(context)
                labels.append(label)

    # Create a DataFrame
    df = pd.DataFrame({"question": questions, "context": contexts, "label": labels})

    return df

In [None]:
squad_train = load_squad("/content/train-v2.0.json" )
squad_dev = load_squad("/content/dev-v2.0.json")

In [None]:
import re

def remove_special_characters(input_string):
    # Use a regular expression to replace all non-alphanumeric characters with an empty string
    clean_string=[]
    for text in input_string:
        clean_string.append(re.sub(r'[^a-zA-Z0-9\s]', '', text))
    return clean_string

In [None]:
squad_train['context'] = remove_special_characters(squad_train['context'])
squad_dev['context'] = remove_special_characters(squad_dev['context'])

In [None]:
squad_train.head(3)

Unnamed: 0,question,context,label
0,When did Beyonce start becoming popular?,Beyonc Giselle KnowlesCarter bijnse beeYONsay born September 4 1981 is an American singer songwriter record producer and actress Born and raised in Houston Texas she performed in various singing and dancing competitions as a child and rose to fame in the late 1990s as lead singer of RB girlgroup Destinys Child Managed by her father Mathew Knowles the group became one of the worlds bestselling girl groups of all time Their hiatus saw the release of Beyoncs debut album Dangerously in Love 2003 which established her as a solo artist worldwide earned five Grammy Awards and featured the Billboard Hot 100 numberone singles Crazy in Love and Baby Boy,1
1,What areas did Beyonce compete in when she was growing up?,Beyonc Giselle KnowlesCarter bijnse beeYONsay born September 4 1981 is an American singer songwriter record producer and actress Born and raised in Houston Texas she performed in various singing and dancing competitions as a child and rose to fame in the late 1990s as lead singer of RB girlgroup Destinys Child Managed by her father Mathew Knowles the group became one of the worlds bestselling girl groups of all time Their hiatus saw the release of Beyoncs debut album Dangerously in Love 2003 which established her as a solo artist worldwide earned five Grammy Awards and featured the Billboard Hot 100 numberone singles Crazy in Love and Baby Boy,1
2,When did Beyonce leave Destiny's Child and become a solo singer?,Beyonc Giselle KnowlesCarter bijnse beeYONsay born September 4 1981 is an American singer songwriter record producer and actress Born and raised in Houston Texas she performed in various singing and dancing competitions as a child and rose to fame in the late 1990s as lead singer of RB girlgroup Destinys Child Managed by her father Mathew Knowles the group became one of the worlds bestselling girl groups of all time Their hiatus saw the release of Beyoncs debut album Dangerously in Love 2003 which established her as a solo artist worldwide earned five Grammy Awards and featured the Billboard Hot 100 numberone singles Crazy in Love and Baby Boy,1


###training model

In [None]:
!pip install transformers



In [None]:
import tensorflow as tf
import transformers
from transformers import AutoTokenizer, TFT5ForConditionalGeneration
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, LSTM, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
import datetime
import os

In [None]:
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [None]:
# Calculate percentiles
percentiles = np.percentile([len(i.split())for i in squad_train['context']] , range(90, 101))

# Print the percentiles
for p, percentile_value in zip(range(90, 101), percentiles):
    print(f"{p}th percentile: {percentile_value}")

90th percentile: 182.0
91th percentile: 187.0
92th percentile: 192.0
93th percentile: 197.0
94th percentile: 203.0
95th percentile: 212.0
96th percentile: 221.0
97th percentile: 233.0
98th percentile: 252.0
99th percentile: 285.0
100th percentile: 650.0


In [None]:
# Calculate percentiles
percentiles = np.percentile([len(i.split())for i in squad_train['question']] , range(90, 101))

# Print the percentiles
for p, percentile_value in zip(range(90, 101), percentiles):
    print(f"{p}th percentile: {percentile_value}")

90th percentile: 14.0
91th percentile: 15.0
92th percentile: 15.0
93th percentile: 15.0
94th percentile: 16.0
95th percentile: 16.0
96th percentile: 17.0
97th percentile: 17.0
98th percentile: 18.0
99th percentile: 20.0
100th percentile: 40.0


In [None]:
from tqdm import tqdm

def tokenizing_df(dataset):

    max_seq_length = 512
    dataset_len =len(dataset)

    input_ids = np.zeros((dataset_len, max_seq_length))
    attention_mask= np.zeros((dataset_len, max_seq_length))

    target_input_ids= np.zeros((dataset_len, 2))
    target_attention_mask= np.zeros((dataset_len, 2))

    for i, row in tqdm(dataset.iterrows()):


        # now truncating tokens to max length

        # adding [CLS] at the begining and [SEP] at the end of the sentence
        pair = row['question'] + '</s>' + row['context']
        # converting tokens to unique IDs
        tokens = tokenizer.encode(pair)
        # adding zeros in the last if the size of the text is less than max length
        input_ids[i, :]= np.array(tokens + [0]*(max_seq_length-len(tokens)))[None,:512]
        # masking vector
        attention_mask[i, :]= np.array([1]*len(tokens) + [0]*(max_seq_length-len(tokens)))[None,:512]


        if row['label'] == 0:

          tokens = tokenizer.encode('A')

          target_input_ids[i, :] = np.array(tokens)[None,:]

          target_attention_mask[i, :] = np.array([1]*len(tokens))[None, :]

        else:

          tokens = tokenizer.encode('B')


          target_input_ids[i, :] = np.array(tokens)[None,:]

          target_attention_mask[i, :] = np.array([1]*len(tokens))[None, :]



    tokenized_data= np.array([input_ids, attention_mask],  dtype= 'int16')
    target_data =np.array([target_input_ids, target_attention_mask], dtype= 'int16')


    return tokenized_data, target_data

In [None]:
squad_x_train, squad_y_train = tokenizing_df(squad_train)
squad_x_dev, squad_y_dev = tokenizing_df(squad_dev)

130319it [02:38, 824.68it/s]
11873it [00:14, 827.11it/s]


In [None]:
pickle.dump((squad_x_train, squad_y_train, squad_x_dev, squad_y_dev),
            open('/content/drive/MyDrive/my assignments/33. A12 Reasoning Challenge- Self case study 2/squad_tokenized_t5_xl_512.pkl','wb'))

In [None]:
squad_x_train, squad_y_train, squad_x_dev, squad_y_dev= \
pickle.load(open('/content/drive/MyDrive/my assignments/33. A12 Reasoning Challenge- Self case study 2/squad_tokenized_t5_xl_512.pkl', 'rb'))

In [None]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))

In [None]:
from transformers import TFT5ForConditionalGeneration
strategy = tf.distribute.TPUStrategy(resolver)

optimizer = tf.keras.optimizers.AdamW(3e-5, clipnorm= 1.0, epsilon= 1e-6)
#loss= tf.keras.losses.BinaryCrossentropy()

with strategy.scope():
    model = TFT5ForConditionalGeneration.from_pretrained(model_name)
    model.compile(optimizer= optimizer,
                  metrics = ['accuracy'])

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [None]:
# function to save new best model with higher Validation accuracy
filepath="/content/drive/MyDrive/my assignments/33. A12 Reasoning Challenge- Self case study 2/DRD model on Squad/weights-{epoch:02d}-{val_accuracy:.4f}.tf"
checkpoint=  ModelCheckpoint(filepath=filepath,
                             monitor='val_accuracy',
                             save_best_only=True,
                             verbose=1,
                             mode='max',
                             save_weights_only=False,
                             options= tf.saved_model.SaveOptions(experimental_io_device="/job:localhost"))

# function to reduce learning rate if validation accuracy stop moving or decreasing
rlr = ReduceLROnPlateau(monitor="val_accuracy", factor= 0.1, min_lr= 0.000001,
           patience=1, verbose=1, mode = 'max')

# it will monitor validation accuracy if it has stop improving or giving constant accuracy at each epoch
# earlystop = EarlyStopping(monitor="val_accuracy", patience=2, verbose=1)

In [None]:
train = {'input_ids':squad_x_train[0], 'attention_mask': squad_x_train[1],
               'labels':squad_y_train[0], 'decoder_attention_mask':squad_y_train[1]}

dev = {'input_ids':squad_x_dev[0], 'attention_mask': squad_x_dev[1],
               'labels':squad_y_dev[0], 'decoder_attention_mask':squad_y_dev[1]}

In [None]:
epochs= 3
batch = 32

model.fit(x= train,
          validation_data= dev,
          verbose= 1,
          epochs= epochs,
          batch_size= batch,
          shuffle =True,)
        #   callbacks=[checkpoint, rlr])

Epoch 1/3


2023-12-15 04:03:39.759688: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:03:39.777042: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:03:39.794161: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:03:39.811362: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:03:39.828337: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:03:39.845319: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:03:39.862792: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority un

   1/4073 [..............................] - ETA: 372:53:39 - loss: 5.8449 - accuracy: 0.1094

2023-12-15 04:09:09.590150: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:09.590341: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:09.590547: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:09.590656: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:09.590761: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:09.590949: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:09.591360: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority un

   2/4073 [..............................] - ETA: 35:24 - loss: 5.7929 - accuracy: 0.1328    

2023-12-15 04:09:10.108220: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:10.108459: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:10.108573: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:10.108703: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-12-15 04:09:10.108864: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.


   3/4073 [..............................] - ETA: 35:18 - loss: 5.5502 - accuracy: 0.1615

2023-12-15 04:09:10.628942: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.


  19/4073 [..............................] - ETA: 35:37 - loss: 3.5736 - accuracy: 0.4062

2023-12-15 04:09:19.077711: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.


  49/4073 [..............................] - ETA: 35:08 - loss: 1.8958 - accuracy: 0.5928

2023-12-15 04:09:34.734061: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.




2023-12-15 04:32:15.019680: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.




2023-12-15 04:48:03.682705: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
2023-12-15 04:48:35.070019: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.


Epoch 2/3

2023-12-15 05:25:15.782516: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.


Epoch 3/3


<keras.src.callbacks.History at 0x7d74bc3e0d00>

In [None]:
model.save_weights('squad_saved_weights_t5_large.h5',
           options=  tf.saved_model.SaveOptions(experimental_io_device="/job:localhost"))

## Inference

In [None]:
from transformers import TFT5ForConditionalGeneration
optimizer = tf.keras.optimizers.AdamW(3e-5, clipnorm= 1.0, epsilon= 1e-6)
#loss= tf.keras.losses.BinaryCrossentropy()

model = TFT5ForConditionalGeneration.from_pretrained('google/flan-t5-large')
model.compile(optimizer= optimizer,
              metrics = ['accuracy'])

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [None]:
path= '/content/drive/MyDrive/my assignments/33. A12 Reasoning Challenge- Self case study 2/squad_saved_weights_t5_large.h5'
model.load_weights(path)
# ,  options = tf.train.CheckpointOptions(experimental_io_device="/job:localhost"))

In [None]:
# Accuracy score on dev dataset
from sklearn.metrics import accuracy_score
from tqdm import tqdm

squad_predicted= []
for i in tqdm(range(0, len(squad_x_dev[0]), 50)):
    squad_predicted.append(model.generate(squad_x_dev[0][i:i+50], output_scores=True, return_dict_in_generate=True))


squad_predicted_labels= [0 if tokenizer.decode(sequence[1]) == 'A' else 1 for i in squad_predicted for sequence in i['sequences']]

# Calculate the accuracy score
accuracy = accuracy_score(squad_dev['label'], squad_predicted_labels)

# Print the accuracy score
print(f"Accuracy: {accuracy}")

Accuracy: 0.9107218057778153


### generating DRD scores

In [None]:
from tqdm import tqdm

def avd_score(model, tokenizer, data):
  '''
  function to get AVD scores from downstream model trained on RACE data set
  '''
    # device = xm.xla_device()
    device = torch.device('cuda')
    model = model.to(device)
    model.eval()


    question= data['only_questions']
    answer= data['only_answers']
    context = data['context']
    scores = []

    # it will iterate through each question
    for i, que in enumerate(tqdm(question)):
        list_of_scores= []
        # it will iterate through each answer of that particular question
        # each question has four option so it will iterate four times
        for j, ans in enumerate(answer[i]):
            # it will get contexts of particular question with particular option
            # it will get 50 contexts
            context_len = len(context[i][j])

            # we will pair question and answer and multiply with 50 times to match context length
            pair= [que + '</s>' + ans] * context_len

            # now we have 50 pairs of ques and ans and 50 contexts
            # now we will tokenize them
            tokens= tokenizer(pair, context[i][j], padding=True, truncation=True, return_tensors="pt").to(device)

            input_ids= torch.unsqueeze(tokens['input_ids'], dim=0).to(device)
            attention_mask= torch.unsqueeze(tokens['attention_mask'], dim=0).to(device)

            # this below code will get softmax scores of each question, answer and context pair which is 50
            with torch.no_grad():

              logits= model(input_ids, attention_mask).logits
            logits.detach()
            softmax = torch.nn.functional.softmax(logits[0], dim= -1)
            list_of_scores.append(softmax.tolist())

        # now we will append softmax scores to the list of all 50 scores
        # each question have 4 options and each option has 50 context scores
        scores.append(list_of_scores)

    return scores

In [None]:
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
from itertools import chain
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

def drd_score(data):
  '''
  function to get DRD scores from downstream model trained on SQUAD dataset
  '''
    question= data['only_questions']
    answer= data['only_answers']
    context = data['context']

    scores = []
    # it will iterate through each question
    for i, que in enumerate(tqdm(question)):
        list_of_scores= []
        # it will iterate through each answer of that particular question
        # each question has four option so it will iterate four times
        for j, ans in enumerate(answer[i]):
            # it will get contexts of particular question with particular option
            # it will get 50 contexts
            context_len = len(context[i][j])
            # we will multiply question 50 times to match with context length
            pair= [que] * context_len

            # now we have 50 pairs of question and 50 contexts
            # now we will tokenize them
            tokens = tokenizer(pair, context[i][j], padding= True, truncation=True, return_tensors="tf")
            # model will generate probabilty scores of each 50 contexts

            predict = model.generate(tokens['input_ids'], output_scores=True, return_dict_in_generate=True)

            softmax = tf.nn.softmax(predict['scores'][0], axis= -1)

            max_values = np.max(softmax, axis = -1)

            # softmax = tf.nn.softmax(max_values, axis= -1)
            # softmax = [tf.nn.softmax(i).tolist() for i in max_values]


            list_of_scores.append(max_values.tolist())

        # now we will append softmax scores to the list of all 50 scores
        # each question have 4 options and each option has 50 context scores
        scores.append(list_of_scores)

    return scores

In [None]:
def joining_scores(data, new_scores):
  '''
  This fuction will join DRD scores with existing BM25 scores
  '''
    old_scores = data['score']
    scores =[]
    for idx_1, score_1 in enumerate(old_scores):
        list_of_scores= []
        for idx_2, score_2 in enumerate(score_1):

            list_of_scores.append([[score_3] + [new_scores[idx_1][idx_2][idx_3]] for idx_3, score_3 in enumerate(score_2)])
        scores.append(list_of_scores)

    return scores

In [None]:
train_challenge['score']= joining_scores(train_challenge, drd_score(train_challenge))
dev_challenge['score']= joining_scores(dev_challenge, drd_score(dev_challenge))
test_challenge['score']= joining_scores(test_challenge, drd_score(test_challenge))

100%|██████████| 1119/1119 [1:00:08<00:00,  3.23s/it]
100%|██████████| 299/299 [16:10<00:00,  3.25s/it]
100%|██████████| 1172/1172 [1:04:03<00:00,  3.28s/it]


In [None]:
pickle.dump((train_challenge, dev_challenge, test_challenge),
            open('/content/drive/MyDrive/my assignments/33. A12 Reasoning Challenge- Self case study 2/arc_datasets_scores_challenge.pkl','wb'))

In [None]:
train_easy['score']= joining_scores(train_easy, drd_score(train_easy))
dev_easy['score']= joining_scores(dev_easy, drd_score(dev_easy))
test_easy['score']= joining_scores(test_easy, drd_score(test_easy))

100%|██████████| 2251/2251 [1:58:42<00:00,  3.16s/it]
100%|██████████| 570/570 [29:21<00:00,  3.09s/it]
100%|██████████| 2376/2376 [2:03:09<00:00,  3.11s/it]


In [None]:
pickle.dump((train_easy, dev_easy, test_easy),
            open('/content/drive/MyDrive/my assignments/33. A12 Reasoning Challenge- Self case study 2/arc_datasets_scores_easy.pkl','wb'))

In [None]:
train_challenge, dev_challenge, test_challenge= \
pickle.load(open('/content/drive/MyDrive/my assignments/33. A12 Reasoning Challenge- Self case study 2/arc_datasets_scores_challenge.pkl', 'rb'))

train_easy, dev_easy, test_easy= \
pickle.load(open('/content/drive/MyDrive/my assignments/33. A12 Reasoning Challenge- Self case study 2/arc_datasets_scores_easy.pkl', 'rb'))