# Bash Commands

In [20]:
!pip install nltk

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [2]:
!pip install -U sentence_transformers



# Pre- requisites

In [23]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/hiteshchoudhary2109/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/hiteshchoudhary2109/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [111]:
import json
import numpy as np
import pandas as pd

In [53]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

# Class

In [133]:
class embedder:
    def __init__ (self,address,model):
        file = open(address)
        self.data = json.load(file)
        self.model = model
    def embed_sentences(self,strings):
        embeddings = []
        for string in strings:
            embeddings.append(np.array(model.encode(string), dtype=np.float32))
        return np.array(embeddings)
    def extract(self):
        self.tools = []
        self.tool_descriptions = []
        self.arg_names = []
        self.arg_descriptions = []
        self.arg_to_tool_map = {}
        for key, value in self.data.items():
            tmp_tool = key
            self.tools.append(tmp_tool)
            
            tmp_tool_dsc = value.get("Description")
            self.tool_descriptions.append(tmp_tool_dsc)
            
            tmp_arg_dsc = value.get("ArgumentName")
        
            for argName, argDesc in tmp_arg_dsc.items():
                if (argDesc.get("Description") == "_"):
                    self.arg_names.append(key)
                    self.arg_descriptions.append(tmp_tool_dsc)
                    self.arg_to_tool_map[key] =key
                else:
                    self.arg_names.append(argName)
                    self.arg_descriptions.append(argDesc.get("Description"))
                    self.arg_to_tool_map[argName] =key
    def encode(self):
        self.tool_enc = self.embed_sentences(self.tools)
        self.tool_des_enc = self.embed_sentences(self.tool_descriptions)
        self.arg_enc = self.embed_sentences(self.arg_names)
        self.arg_des_enc = self.embed_sentences(self.arg_descriptions)
    def remove_stopwords(self,query):
        stop_words = set(stopwords.words('english'))
        word_tokens = word_tokenize(query)
        filtered_query = [word for word in word_tokens if word.lower() not in stop_words]
        return ' '.join(filtered_query)
    def check(self,querries):
        self.tables = {}
        for query in querries:
            qenc = self.model.encode(query)
            table = {}
            for arg, arg_enc in zip(self.arg_names, self.arg_enc):
                table[arg] = util.cos_sim(qenc, arg_enc)
            self.tables[query] = table
        return self.tables
    def check_without_stopwords(self,querries):
        self.tables = {}
        for query in querries:
            quer = self.remove_stopwords(query)
            qenc = self.model.encode(quer)
            table = {}
            for arg, arg_enc in zip(self.arg_names, self.arg_enc):
                table[arg] = util.cos_sim(qenc, arg_enc)
            self.tables[query] = table
        return self.tables
        

# Practice

In [134]:
example = embedder("tools_dictionary.json",model)
example.extract()
example.encode()
print(example.arg_names[2])

get_sprint_id


In [135]:
querries = [
    'Get all work items similar to TKT-123, summarize them, create issues from that summary, and prioritize them',
    'Given a customer meeting transcript T, create action items and add them to my current sprint',
    'List all high severity tickets coming in from slack from customer Cust123 and generate a summary of them.',
    'What are my all issues in the triage stage under part FEAT-123? Summarize them.',
    'Summarize high severity tickets from the customer UltimateCustomer',
    'Prioritize my P0 issues and add them to the current sprint',
    'What is the meaning of life?',
    'Summarize issues similar to don:core:dvrv-us-1:devo/0:issue/1'
]
ans = example.check(querries)
fd = pd.DataFrame(ans)
fd

Unnamed: 0,"Get all work items similar to TKT-123, summarize them, create issues from that summary, and prioritize them","Given a customer meeting transcript T, create action items and add them to my current sprint",List all high severity tickets coming in from slack from customer Cust123 and generate a summary of them.,What are my all issues in the triage stage under part FEAT-123? Summarize them.,Summarize high severity tickets from the customer UltimateCustomer,Prioritize my P0 issues and add them to the current sprint,What is the meaning of life?,Summarize issues similar to don:core:dvrv-us-1:devo/0:issue/1
text,[[tensor(0.1321)]],[[tensor(0.2260)]],[[tensor(0.1211)]],[[tensor(0.0281)]],[[tensor(0.1578)]],[[tensor(-0.0035)]],[[tensor(0.2108)]],[[tensor(0.0370)]]
work_id,[[tensor(0.2162)]],[[tensor(0.0892)]],[[tensor(0.0939)]],[[tensor(0.0239)]],[[tensor(0.0146)]],[[tensor(-0.0036)]],[[tensor(0.1199)]],[[tensor(0.0587)]]
get_sprint_id,[[tensor(0.0646)]],[[tensor(0.3608)]],[[tensor(0.1974)]],[[tensor(0.0493)]],[[tensor(0.0997)]],[[tensor(0.3114)]],[[tensor(0.1092)]],[[tensor(0.0422)]]
objects,[[tensor(0.0963)]],[[tensor(0.0050)]],[[tensor(0.0302)]],[[tensor(0.0337)]],[[tensor(0.0569)]],[[tensor(-0.0697)]],[[tensor(0.1628)]],[[tensor(-0.0465)]]
query,[[tensor(0.1127)]],[[tensor(-0.0015)]],[[tensor(0.1668)]],[[tensor(0.0041)]],[[tensor(0.1517)]],[[tensor(-0.0734)]],[[tensor(0.1090)]],[[tensor(0.0107)]]
who_am_i,[[tensor(0.0267)]],[[tensor(0.0075)]],[[tensor(0.0165)]],[[tensor(0.0844)]],[[tensor(0.0144)]],[[tensor(0.0024)]],[[tensor(0.1846)]],[[tensor(0.0415)]]
applies_to_part,[[tensor(0.1085)]],[[tensor(0.1346)]],[[tensor(0.0468)]],[[tensor(0.2585)]],[[tensor(0.0965)]],[[tensor(0.0633)]],[[tensor(0.0915)]],[[tensor(0.0961)]]
created_by,[[tensor(-0.0087)]],[[tensor(0.0772)]],[[tensor(0.0678)]],[[tensor(-0.0288)]],[[tensor(0.0012)]],[[tensor(-0.0325)]],[[tensor(0.1103)]],[[tensor(0.0660)]]
issue.priority,[[tensor(0.2028)]],[[tensor(0.0365)]],[[tensor(0.0930)]],[[tensor(0.2464)]],[[tensor(0.1531)]],[[tensor(0.3181)]],[[tensor(0.0731)]],[[tensor(0.1771)]]
issue.rev_orgs,[[tensor(0.0307)]],[[tensor(0.0105)]],[[tensor(-0.0444)]],[[tensor(0.1556)]],[[tensor(-0.1015)]],[[tensor(-0.0005)]],[[tensor(0.0038)]],[[tensor(0.1639)]]


In [136]:
querries = [
    'Get all work items similar to TKT-123, summarize them, create issues from that summary, and prioritize them',
    'Given a customer meeting transcript T, create action items and add them to my current sprint',
    'List all high severity tickets coming in from slack from customer Cust123 and generate a summary of them.',
    'What are my all issues in the triage stage under part FEAT-123? Summarize them.',
    'Summarize high severity tickets from the customer UltimateCustomer',
    'Prioritize my P0 issues and add them to the current sprint',
    'What is the meaning of life?',
    'Summarize issues similar to don:core:dvrv-us-1:devo/0:issue/1'
]
ans = example.check_without_stopwords(querries)
fd = pd.DataFrame(ans)
fd

Unnamed: 0,"Get all work items similar to TKT-123, summarize them, create issues from that summary, and prioritize them","Given a customer meeting transcript T, create action items and add them to my current sprint",List all high severity tickets coming in from slack from customer Cust123 and generate a summary of them.,What are my all issues in the triage stage under part FEAT-123? Summarize them.,Summarize high severity tickets from the customer UltimateCustomer,Prioritize my P0 issues and add them to the current sprint,What is the meaning of life?,Summarize issues similar to don:core:dvrv-us-1:devo/0:issue/1
text,[[tensor(0.1476)]],[[tensor(0.2102)]],[[tensor(0.1937)]],[[tensor(0.0458)]],[[tensor(0.1664)]],[[tensor(-0.0036)]],[[tensor(0.2477)]],[[tensor(-0.0045)]]
work_id,[[tensor(0.2821)]],[[tensor(0.1130)]],[[tensor(0.1305)]],[[tensor(0.0584)]],[[tensor(0.0155)]],[[tensor(-0.0036)]],[[tensor(0.1377)]],[[tensor(0.0494)]]
get_sprint_id,[[tensor(0.1279)]],[[tensor(0.3975)]],[[tensor(0.1847)]],[[tensor(0.0804)]],[[tensor(0.0924)]],[[tensor(0.3716)]],[[tensor(0.0695)]],[[tensor(0.0442)]]
objects,[[tensor(0.1186)]],[[tensor(-0.0161)]],[[tensor(0.0547)]],[[tensor(0.0465)]],[[tensor(0.0647)]],[[tensor(-0.0845)]],[[tensor(0.1638)]],[[tensor(-0.0580)]]
query,[[tensor(0.1856)]],[[tensor(0.0138)]],[[tensor(0.1734)]],[[tensor(0.0307)]],[[tensor(0.1453)]],[[tensor(-0.0765)]],[[tensor(0.1349)]],[[tensor(-0.0144)]]
who_am_i,[[tensor(0.0081)]],[[tensor(0.0253)]],[[tensor(0.0469)]],[[tensor(0.0911)]],[[tensor(0.0190)]],[[tensor(0.0154)]],[[tensor(0.1873)]],[[tensor(0.0435)]]
applies_to_part,[[tensor(0.0678)]],[[tensor(0.1442)]],[[tensor(0.0984)]],[[tensor(0.2815)]],[[tensor(0.0895)]],[[tensor(0.0630)]],[[tensor(0.1501)]],[[tensor(0.0908)]]
created_by,[[tensor(0.0596)]],[[tensor(0.0922)]],[[tensor(0.1160)]],[[tensor(-0.0004)]],[[tensor(-0.0085)]],[[tensor(-0.0244)]],[[tensor(0.1485)]],[[tensor(0.0427)]]
issue.priority,[[tensor(0.1772)]],[[tensor(0.0308)]],[[tensor(0.0840)]],[[tensor(0.2410)]],[[tensor(0.1499)]],[[tensor(0.3414)]],[[tensor(0.1287)]],[[tensor(0.1968)]]
issue.rev_orgs,[[tensor(0.0347)]],[[tensor(0.0065)]],[[tensor(-0.0291)]],[[tensor(0.1968)]],[[tensor(-0.1003)]],[[tensor(0.0160)]],[[tensor(0.0475)]],[[tensor(0.2093)]]
