In [115]:
import numpy as np
import torch
import os 

In [116]:
tokenizer = None 
    
import numpy as np
import torch
import os 

config_switch=os.getenv('DOCKER', 'local')
if config_switch=='local':
    startup_nodes = [{"host": "127.0.0.1", "port": "30001"}, {"host": "127.0.0.1", "port":"30002"}, {"host":"127.0.0.1", "port":"30003"}]
else:
    startup_nodes = [{"host": "rgcluster", "port": "30001"}, {"host": "rgcluster", "port":"30002"}, {"host":"rgcluster", "port":"30003"}]

try: 
    from redisai import ClusterClient
    redisai_cluster_client = ClusterClient(startup_nodes=startup_nodes)
except:
    print("Redis Cluster is not available")

def loadTokeniser():
    global tokenizer
    from transformers import BertTokenizerFast
    tokenizer = BertTokenizerFast.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
    return tokenizer


def qa(question, sentence_key,hash_tag):
    ### question is encoded
    ### use pre-computed context/answer text tensor

    global tokenizer

    if not tokenizer:
        tokenizer=loadTokeniser()

     

    token_key = f"tokenized:bert:qa:{sentence_key}"

    input_ids_question = tokenizer.encode(question, add_special_tokens=True, truncation=True, return_tensors="np")


    
    input_ids_context=redisai_cluster_client.tensorget(token_key)
    input_ids = np.append(input_ids_question,input_ids_context)
    
    print(input_ids.shape)
    print(input_ids)
    attention_mask = np.array([[1]*len(input_ids)])
    input_idss=np.array([input_ids])
    print(input_idss.shape)
    print("Attention mask shape ",attention_mask.shape)
    
    num_seg_a=input_ids_question.shape[1]
    print(num_seg_a)
    num_seg_b=input_ids_context.shape[0]
    print(num_seg_b)
    token_type_ids = np.array([0]*num_seg_a + [1]*num_seg_b)
    print("Segments id",token_type_ids.shape)
    
    redisai_cluster_client.tensorset(f'input_ids{hash_tag}', input_idss)
    redisai_cluster_client.tensorset(f'attention_mask{hash_tag}', attention_mask)
    redisai_cluster_client.tensorset(f'token_type_ids{hash_tag}', token_type_ids)

    redisai_cluster_client.modelrun(f'bert-qa{hash_tag}', [f'input_ids{hash_tag}', f'attention_mask{hash_tag}', f'token_type_ids{hash_tag}'],
                        [f'answer_start_scores{hash_tag}', f'answer_end_scores{hash_tag}'])
    print(f"Model run on {hash_tag}")
    answer_start_scores = redisai_cluster_client.tensorget(f'answer_start_scores{hash_tag}')
    answer_end_scores = redisai_cluster_client.tensorget(f'answer_end_scores{hash_tag}')

    answer_start = np.argmax(answer_start_scores)
    answer_end = np.argmax(answer_end_scores) + 1
    
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    print(answer)
    return answer


In [117]:
input_ids

array([  101, 12353,  1997,  2451,  3967,  7312,   102,  1045,  1050,
        1037,  1052,  1037,  1054,  1037,  1048,  1048,  1041,  1048,
        1037,  1052,  1052,  1054,  1051,  1037,  1039,  1044,  1059,
        1041,  1040,  1045,  1054,  1041,  1039,  1056,  1048,  1061,
        1054,  1041,  1039,  1051,  1058,  1041,  1054,  1041,  1040,
        1044,  1061,  1038,  1054,  1045,  1040,  1045,  1062,  1041,
        1040,  1058,  1045,  1054,  1037,  1048,  1055,  1041,  1053,
        1057,  1041,  1050,  1039,  1041,  1055,  1042,  1054,  1051,
        1049,  1056,  1044,  1041,  1055,  1057,  1054,  1042,  1037,
        1039,  1041,  1051,  1042,  1056,  1044,  1041,  1049,  1045,
        1039,  1054,  1051,  1037,  1054,  1054,  1037,  1061,   102])

In [118]:
question="Effectiveness of community contact reduction"
content_text="This would need tight coordination among pharmaceutical companies, governments, regulatory agencies, and the World Health Organization (WHO), as well as novel and out-of-the-box approaches to cGMP production, release processes, regulatory science, and clinical trial design."
print(qa(question,"PMC261870.xml:{06S}:26",'{06S}'))

(99,)
[  101 12353  1997  2451  3967  7312   102  1045  1050  1037  1052  1037
  1054  1037  1048  1048  1041  1048  1037  1052  1052  1054  1051  1037
  1039  1044  1059  1041  1040  1045  1054  1041  1039  1056  1048  1061
  1054  1041  1039  1051  1058  1041  1054  1041  1040  1044  1061  1038
  1054  1045  1040  1045  1062  1041  1040  1058  1045  1054  1037  1048
  1055  1041  1053  1057  1041  1050  1039  1041  1055  1042  1054  1051
  1049  1056  1044  1041  1055  1057  1054  1042  1037  1039  1041  1051
  1042  1056  1044  1041  1049  1045  1039  1054  1051  1037  1054  1054
  1037  1061   102]
(1, 99)
Attention mask shape  (1, 99)
7
92
Segments id (99,)
Model run on {06S}
effectiveness of community contact reduction [SEP]
effectiveness of community contact reduction [SEP]


In [37]:
sentence_key="PMC261870.xml:{06S}:26"
token_key = f"tokenized:bert:qa:{sentence_key}"

In [123]:
redisai_cluster_client.connection_pool

ClusterConnectionPool<ClusterConnection<host=localhost,port=6379,db=0>, ClusterConnection<host=localhost,port=6379,db=0>, ClusterConnection<host=localhost,port=6379,db=0>>

In [124]:
slot = redisai_cluster_client.connection_pool.nodes.keyslot(sentence_key)
node = redisai_cluster_client.connection_pool.get_master_node_by_slot(slot)
connection = redisai_cluster_client.connection_pool.get_connection_by_node(node)
connection.send_command('RG.TRIGGER',"RunQABERT",sentence_key,question)

In [125]:
connection.__dict__

{'readonly': False,
 'pid': 3838927,
 'host': '172.19.0.4',
 'port': 30001,
 'db': 0,
 'username': None,
 'client_name': None,
 'password': None,
 'socket_timeout': None,
 'socket_connect_timeout': None,
 'socket_keepalive': False,
 'socket_keepalive_options': {},
 'socket_type': 0,
 'retry_on_timeout': False,
 'health_check_interval': 0,
 'next_health_check': 0,
 'encoder': <redis.connection.Encoder at 0x7f7016f57f40>,
 '_sock': <socket.socket fd=67, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('172.19.0.1', 46152), raddr=('172.19.0.4', 30001)>,
 '_parser': <rediscluster.connection.ClusterParser at 0x7f7016f57ac0>,
 '_connect_callbacks': [],
 '_buffer_cutoff': 6000,
 'node': {'host': '172.19.0.4',
  'port': 30001,
  'name': '172.19.0.4:30001',
  'server_type': 'master'}}

In [126]:
redisai_cluster_client.parse_response(connection,"RG.TRIGGER")

[b'[CLS]']

In [97]:
np.int64

numpy.int64