In [4]:
import requests
from pathlib import Path
import jsonlines
import json

current_directory = Path.cwd()
file_path = current_directory / '..' / '..' / 'novice' / 'nlp.jsonl'
nlp_file = file_path.resolve()
print(f"nlp_file: {nlp_file}\ncurrent_directory: {current_directory}")

# URL of the FastAPI endpoint
url = 'http://0.0.0.0:5002/health'

#with open(nlp_file, 'r') as file:


response = requests.get(url)

# Print the response
print(response.json())

nlp_file: /home/jupyter/novice/nlp.jsonl
current_directory: /home/jupyter/til-24-base/nlp
{'message': 'health ok'}


In [1]:
import transformers
print(transformers.__version__)

4.37.0


In [9]:
from transformers import pipeline
qa = pipeline('question-answering', model='bert-large-uncased-whole-word-masking-finetuned-squad')
questions = {
    'heading': "What is the heading value?", 
    'target': "What is the full target description?",
    'tool': "What is the tool to deploy?"
}

context = "Heading is one seven five, target is black, blue, and grey fighter jet, tool to deploy is machine gun."

for question in questions.values():
    output = qa(context=context, question=question)
    print(output)

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


{'score': 0.9911791682243347, 'start': 11, 'end': 25, 'answer': 'one seven five'}
{'score': 0.6000996828079224, 'start': 37, 'end': 70, 'answer': 'black, blue, and grey fighter jet'}
{'score': 0.956532895565033, 'start': 90, 'end': 101, 'answer': 'machine gun'}


In [None]:
# record the time to finish checking as well

In [1]:
#TESTING

from transformers import pipeline
import torch

device = 0 if torch.cuda.is_available() else -1

qa = pipeline('question-answering', device=device, model="valhalla/electra-base-discriminator-finetuned_squadv1")

from pathlib import Path
import jsonlines
import json
import math
import re
import time # TESTING

current_directory = Path.cwd()
file_path = current_directory / '..' / '..' / 'novice' / 'nlp.jsonl'
nlp_file = file_path.resolve()
print(f"nlp_file: {nlp_file}\ncurrent_directory: {current_directory}")

transcripts = []
extracted_commands = []

with open(nlp_file, 'r') as file:
    for line in file:
        data = json.loads(line)
        transcripts.append(data["transcript"])
        extracted_commands.append({"heading": data["heading"], "target": data["target"], "tool": data["tool"]})
        
questions = {
    'heading': "What is the heading value?", 
    'target': "What is the full target description?",
    'tool': "What is the tool to deploy?"
}
number_map = {
    "zero": "0",
    "one": "1",
    "two": "2",
    "three": "3",
    "four": "4",
    "five": "5",
    "six": "6",
    "seven": "7",
    "eight": "8",
    "niner": "9"
}
        
accuracy_count = 0
length = len(transcripts)

start_time = time.time() #TESTING

for i in range(length):
    json_output = {}
    context = transcripts[i]
    for key, question in questions.items():
        output = qa(question=question, context=context)
        json_output[key] = output['answer']

    # post-processing
    try:
        json_output['heading'] = "".join(number_map[word] for word in json_output['heading'].split())
    except:
        pass
    json_output['tool'] = re.sub(r'\s*-\s*', '-', json_output['tool'])
    json_output['target'] = re.sub(r'\s+,', ',', json_output['target'])
    
    #if i% math.ceil(length/100) == 0:
    if json_output != extracted_commands[i]:
        print(f"Iteration: {i}/{length}")
        print(f"json_output: {json_output}")
        print(f"extracted_commands[i]: {extracted_commands[i]}")
    if json_output == extracted_commands[i]:
        accuracy_count += 1
        
end_time = time.time() #TESTING
                                                          
accuracy = accuracy_count / length * 100
print(f"Accuracy: {accuracy:.02f}%")

time_taken = end_time - start_time
print(f"Time taken: {time_taken}")



nlp_file: /home/jupyter/novice/nlp.jsonl
current_directory: /home/jupyter/til-24-base/nlp




Accuracy: 100.00%
Time taken: 103.91580772399902
