In [1]:
!pip install pytorch-ignite
!pip install transformers
!pip install transformers[sentencepiece]
!pip install  pytorch-lightning==1.4.9
!pip install  tokenizers

Collecting pytorch-ignite
  Downloading pytorch_ignite-0.4.7-py3-none-any.whl (240 kB)
[?25l[K     |█▍                              | 10 kB 26.1 MB/s eta 0:00:01[K     |██▊                             | 20 kB 33.5 MB/s eta 0:00:01[K     |████                            | 30 kB 39.4 MB/s eta 0:00:01[K     |█████▌                          | 40 kB 34.9 MB/s eta 0:00:01[K     |██████▉                         | 51 kB 34.8 MB/s eta 0:00:01[K     |████████▏                       | 61 kB 34.6 MB/s eta 0:00:01[K     |█████████▌                      | 71 kB 33.3 MB/s eta 0:00:01[K     |███████████                     | 81 kB 33.3 MB/s eta 0:00:01[K     |████████████▎                   | 92 kB 33.7 MB/s eta 0:00:01[K     |█████████████▋                  | 102 kB 32.8 MB/s eta 0:00:01[K     |███████████████                 | 112 kB 32.8 MB/s eta 0:00:01[K     |████████████████▍               | 122 kB 32.8 MB/s eta 0:00:01[K     |█████████████████▊              | 133 kB 3

In [2]:
import json
import pandas as pd
import numpy as np
import torch
import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
from termcolor import colored
import textwrap
from torch.utils.data import Dataset, DataLoader


from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5TokenizerFast as T5Tokenizer,
    get_linear_schedule_with_warmup
)

In [3]:
MODEL_NAME = 't5-base'

tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)

Downloading:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

In [4]:
from pytorch_lightning.core.decorators import auto_move_data
MODEL_NAME = 't5-base'

class QNLIModel(pl.LightningModule):
    
    def __init__(self):
        super().__init__()
        self.model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME,return_dict=True)

    def forward(self, input_ids, attention_mask, labels=None):
        output = self.model(
            input_ids = input_ids,
            attention_mask = attention_mask,
            labels = labels
        )

        return output.loss , output.logits

    def training_step(self, batch, batch_idx):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        loss , outputs = self(input_ids,attention_mask,labels)

        self.log('train_loss', loss, prog_bar=True,logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        loss , outputs = self(input_ids,attention_mask,labels)

        self.log('val_loss', loss, prog_bar=True,logger=True)
        return loss

    def test_step(self, batch, batch_idx):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        loss , outputs = self(input_ids,attention_mask,labels)

        self.log('test_loss', loss, prog_bar=True,logger=True)
        return loss


    def configure_optimizers(self):
        return AdamW(self.parameters(),lr=0.0001)

    # QNLIModel.forward = auto_move_data(QNLIModel.forward)


In [5]:
model = QNLIModel()
model.load_state_dict(torch.load('/content/drive/MyDrive/finetuned_models/best_checkpoint_t5_qnli.ckpt'))
# model.eval()

Downloading:   0%|          | 0.00/850M [00:00<?, ?B/s]

<All keys matched successfully>

In [6]:
model.freeze()

In [11]:
def generate_answer(question,sentence):#(question)
    source_encoding = tokenizer(
        question,#question['question]
        sentence,#question['sentence']
        max_length = 396,
        padding = 'max_length',
        truncation = 'only_second',
        return_attention_mask = True,
        add_special_tokens = True,
        return_tensors = 'pt'
    )

    generated_ids = model.model.generate(
        input_ids = source_encoding['input_ids'],
        attention_mask = source_encoding['attention_mask'],
        num_beams = 1,
        max_length = 80,
        repetition_penalty = 2.5,
        length_penalty = 1.0,
        output_scores = True,
        early_stopping = True,
        use_cache = True
    )
    print(generated_ids)

    preds = [
             tokenizer.decode(generated_id,skip_special_tokens=True, clean_up_tokenization_spaces=True)
             for generated_id in  generated_ids
    ]

    return ''.join(preds)

In [9]:
q1_data = pd.read_excel('/content/Output_new_Q1.xlsx',index_col=0)
# q2_data = pd.read_excel('/content/Output_new_Q2.xlsx',index_col=0)
# q3_data = pd.read_excel('/content/Output_new_Q3.xlsx',index_col=0)
# q4_data = pd.read_excel('/content/Output_new_Q4.xlsx',index_col=0)

In [None]:
text  = 

In [10]:
frames = [q1_data,q2_data,q3_data,q4_data]
dataset = pd.concat(frames)

In [11]:
# dataset

Unnamed: 0,Business Name,Address,Question,Expected Response,Snippets,Url,Machine result,Machine Snippet,Machine url,Validation,Metrics,UW Review,UW Response,UW Comments,Unnamed: 14,webtext,New Context,Avg Sim,label,Score,Avg,Output,Unnamed: 12,Avg_Sim,Positive,Negative
0,"AMERICAN PORTWELL TECHNOLOGY, INC.","44200 Christy Street, Fremont, CA 94538 (USA)",Does the applicant provide products or work re...,1,Aircraft communications and navigation systems...,https://portwell.com/solutions/military.php,YES,Aircraft communications and navigation systems...,https://portwell.com/solutions/military.php,Correct,TP,Yes,Agree,,,Military Embedded Computing Solutions \n Home ...,"navigation, command and control, and...",0.617131,We provide products or work related to the ope...,0.967396,0.634645,1,,,,
1,BAE Systems Inc,"2941 Fairview Park Dr, Falls Church, VA 22042",Does the applicant provide products or work re...,1,We’ve been improving flight controls for 40+ y...,https://www.baesystems.com/en-us/product/fligh...,YES,"Mission-critical flight, pilot, and engine con...",https://www.baesystems.com/en-us/our-company/i...,Correct,TP,Yes,Agree,,,Flight Control Systems: Ground Collision Avoid...,Our products include fly-by-wire flight contr...,0.551032,We provide products or work related to the ope...,0.934713,0.570216,1,,,,
2,PCB POWER INC,"18153 Napa Street, Northridge Estate, Californ...",Does the applicant provide products or work re...,1,"Automotive\tNavigation systems, music systems,...",https://usa.pcbpower.com/flexible-and-rigid-fl...,YES,"Automotive Navigation systems, music systems, ...",https://usa.pcbpower.com/flexible-and-rigid-fl...,Correct,TP,Yes,Agree,,,"RF PCB Manufacturer USA, RF PCB Suppliers USA,...","equipment, radio commu...",0.525229,We provide products or work related to the ope...,0.949932,0.546465,1,,,,
3,EMSG,"951 Monocacy Road, York, PA 17404",Does the applicant provide products or work re...,1,Marine navigation,https://emsginc.com/electronics-manufacturing-...,YES,With our ongoing commitment to quality control...,https://emsginc.com/industries-we-serve/traffi...,Correct,TP,Yes,Agree,Marine navigation,,Electronics Manufacturing and PCB Assembly in ...,"Once strictly mechanical devices, vehicles now...",0.500635,We provide products or work related to the ope...,0.968354,0.524021,1,,,,
4,"GENERAL DYNAMICS MISSION SYSTEMS, INC","8220 East Roosevelt Street Scottsdale, AZ 85257",Does the applicant provide products or work re...,1,SPACE OPERATIONS & ENGINEERING SERVICES,https://gdmissionsystems.com/space,YES,Space Operations & Engineering Services,https://gdmissionsystems.com/products/communic...,Correct,TP,Yes,Agree,"Better snippet: We design, build and manage gr...",,Space - General Dynamics Mission Systems \n La...,We build products and deliver technology for ...,0.498263,We provide products or work related to the ope...,0.955822,0.521141,1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66,PROVIDENT NUTRACEUTICAL,"3017 Business Park Dr, Stevens Point, WI 54482...",Does the applicant a) use or provide exotic me...,0,We are a premier contract manufacturer of nutr...,https://providentnutraceuticals.com/Default,NO,,https://providentnutraceuticals.com/services,Correct,TN,Yes,Agree,,,Home \n 1.800.332.2351 \n M-F 7am-7pm CST \n A...,with this combo of key nutrients and botanica...,0.233134,"We use or provide exotic metals, materials, na...",0.569005,0.266722,0,,0.233134,"We use or provide exotic metals, materials, na...","We do not use or provide exotic metals, materi..."
67,"ACCELERATED RX ANALYTICS, LLC","345 US Highway 9, Suite 184, Manalapan, NJ 07726",Does the applicant a) use or provide exotic me...,0,We provide commercial research and analysis wi...,https://acceleratedrxanalytics.com/,NO,,http://acceleratedrxanalytics.com/,Correct,TN,Yes,Agree,,,"Pharmaceutical Research Company, Manalapan, NJ...","Cardiology (Heart failure, Blood thinners, IT...",0.222100,"We use or provide exotic metals, materials, na...",0.543124,0.254202,0,,0.222100,"We use or provide exotic metals, materials, na...","We do not use or provide exotic metals, materi..."
68,ITHERAPEUTICS,"3701 FAU Blvd. Suite 210 - 112 Boca Raton, F...",Does the applicant a) use or provide exotic me...,0,iTherapeutics is a biotechnology company speci...,http://www.itherapeutics.com/2.html,NO,,http://itherapeutics.com/3.html,Correct,TN,Yes,Agree,,,iTherapeutics Corp. - About Us \n Home \n Abou...,"Prior to co-founding iTherapeutics, Mr. Willi...",0.205394,"We use or provide exotic metals, materials, na...",0.506095,0.235464,0,,0.205394,"We use or provide exotic metals, materials, na...","We do not use or provide exotic metals, materi..."
69,"ANGEX PHARMACEUTICAL, INC.","675 US Highway One, North Brunswick, NJ 08902",Does the applicant a) use or provide exotic me...,0,Angex Pharmaceutical is developing a wide-rang...,https://www.angexpharmaceutical.com/r-d,NO,,https://www.angexpharmaceutical.com/about-us,Correct,TN,Yes,Agree,,,R&D | Angex Pharmaceutical \n Home \n About Us...,New Jersey Bioscience Center Pipeline | Angex...,0.172398,"We use or provide exotic metals, materials, na...",0.573702,0.212529,0,,0.172398,"We use or provide exotic metals, materials, na...","We do not use or provide exotic metals, materi..."


In [12]:
pred_answers = []

for index,row in q1_data[:5].iterrows():
    question = row['label']
    sentence = row['New Context']
    true_ans = row['Metrics']
    #true_ans = 'Yes' if row['label'] == 'entailment' else 'No'
    pred_ans = generate_answer(question,sentence)
    #pred_ans_yn = 'Yes' if pred_answers == 'entailment' else 'No'
    print('Question: ' , question , ' Context :',sentence)
    print(true_ans,pred_ans,'\n')
    pred_answers.append(pred_ans)

tensor([[   0,   59,  834,   35, 5756,  297,    1]])
Question:  We provide products or work related to the operation, safety, navigation, or control of any of the following: aircraft, spacecraft, watercraft, rail transport, automobiles, or motorcycles?  Context :           navigation, command and control, and communications systems.           aircraft, artificial satellites, and spacecraft.           aircrafts, marine and handheld.
TP not_entailment 

tensor([[   0,   59,  834,   35, 5756,  297,    1]])
Question:  We provide products or work related to the operation, safety, navigation, or control of any of the following: aircraft, spacecraft, watercraft, rail transport, automobiles, or motorcycles?  Context :  Our products include fly-by-wire flight controls, pilot controls (inceptor systems), full authority digital engine controls (FADEC), power management systems, flight deck systems, cabin management systems, weapon controls, autonomous flight systems, and mission systems. Integrat

In [13]:
dataset["MNLI_labels"] = pred_answers

In [14]:
dataset.to_excel('baseline_MNLI_Output.xlsx')