In [82]:
import json
import os
from typing import Any, Dict, Iterable, List, Optional

import pandas as pd
import re
import requests
import tqdm
from dotenv import load_dotenv
from genai import Model
from genai.model import Credentials
from genai.schemas import GenerateParams
from datasets import Dataset

import torch
from torch.nn.functional import normalize
from torch import clamp, sum
from transformers import AutoTokenizer, AutoModel

from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
from milvus import default_server
from pymilvus import connections, utility

In [83]:
def get_genai_creds():
    load_dotenv(override=True)
    api_key = os.getenv("GENAI_KEY", None)
    api_url = os.getenv("GENAI_API", None)
    if api_key is None or api_url is None:
        print("Either api_key or api_url is None. Please make sure your credentials are correct.")
    if api_url is not None:
        api_url = api_url.rstrip("/")
    creds = Credentials(api_key, api_url)
    return creds

creds = get_genai_creds()
if creds.api_endpoint:
    print(f"Your API endpoint is: {creds.api_endpoint}")

Your API endpoint is: https://bam-api.res.ibm.com/v1


In [84]:
headers = {
    'Authorization': f'Bearer {os.getenv("GENAI_KEY", None)}'
}

# get the list of supported models from the API
models_response = requests.get(f"{creds.api_endpoint}/models", headers=headers)

# Parse the JSON response
models_data = json.loads(models_response.content)

model_ids = []
for model_n in models_data["results"]:
    print(model_n["id"])

salesforce/codegen2-16b
codellama/codellama-34b-instruct
tiiuae/falcon-180b
tiiuae/falcon-40b
ibm/falcon-40b-8lang-instruct
google/flan-t5-xl
google/flan-t5-xxl
google/flan-ul2
eleutherai/gpt-neox-20b
togethercomputer/gpt-neoxt-chat-base-20b
ibm/granite-13b-chat-grounded-v01
ibm/granite-13b-chat-v1
ibm/granite-13b-instruct-v1
ibm/granite-3b-code-plus-v1
meta-llama/llama-2-13b
meta-llama/llama-2-13b-chat
meta-llama/llama-2-13b-chat-beam
meta-llama/llama-2-70b
meta-llama/llama-2-70b-chat
meta-llama/llama-2-7b
meta-llama/llama-2-7b-chat
mosaicml/mpt-30b
ibm/mpt-7b-instruct
bigscience/mt0-xxl
bigcode/starcoder
google/ul2


In [85]:
COLLECTION_NAME = "AMS_test"

connections.connect(host='127.0.0.1', port=default_server.listen_port)

# Check if the server is ready.
print(utility.get_server_version())

collection = Collection(name=COLLECTION_NAME)
collection.load()

v2.2-testing-20230824-68-ga34a9d6-lite


In [86]:
challenge_question_df = pd.read_csv('ExampleChallengeQuestions.csv')
challenge_question_df['assignment_group'] = challenge_question_df['assignment_group'].apply(convert_to_lower)
challenge_question_df.head()

Unnamed: 0,assignment_group,short_description,long_description
0,,6 ways to create maintenance order,6 ways to create maintenance order
1,,ADVANCE LEAVE workflow needs to be added with ...,ADVANCE LEAVE workflow needs to be added with ...
2,sap-pi,401 Unauthorized error,Getting 401 unauthorized error please help


In [87]:
def get_relevant_chunks(question_text, assignment_group = None, n_results=5):
    
    relevant_chunks_cases = process_question(collection, question_text, assignment_id = assignment_group, limit=n_results, data_type="td")
    
    result_set_length = [len(x) for x in relevant_chunks_cases][0]
    
    if result_set_length < n_results:
        print(question_text)
        print(assignment_group)
    
    relevant_chunks = process_question(collection, question_text, limit=n_results, data_type="kb")
        
    return relevant_chunks_cases, relevant_chunks

def process_question(collection, question, assignment_id=None, limit=5, data_type="td"):
    # Tokenize and embed the question
    text = "query: " + question
    inputs = embedding_tokenizer(text, add_special_tokens=True, truncation=True, padding="max_length", return_attention_mask=True, return_tensors="pt")#.to(device)
    
    sentence_embs = embedding_model(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask']
    )[0]
    
    input_mask_expanded = inputs['attention_mask'].unsqueeze(-1).expand(sentence_embs.size()).float()
    embeddings = sum(sentence_embs * input_mask_expanded, 1) / clamp(input_mask_expanded.sum(1), min=1e-9)
    
    # Normalize the embeddings
    embeddings = normalize(embeddings, dim=1)
    
    if assignment_id is None:
        # Perform the search without filter
        res = collection.search(
            embeddings.tolist(),
            anns_field='chunk_embedding',
            param = {},
            output_fields=['chunk', 'type'],
            expr=f"type=='{data_type}'",
            limit = limit)
    elif assignment_id is not None and data_type == "td":
        res = collection.search(
            embeddings.tolist(),
            anns_field='chunk_embedding',
            param = {},
            output_fields=['chunk', 'type'],
            expr=f"type=='{data_type}' and assignment_id=='{assignment_id.lower().strip()}'",
            limit = limit)
    else:
        res = collection.search(
            embeddings.tolist(),
            anns_field='chunk_embedding',
            param = {},
            output_fields=['chunk', 'type'],
            expr=f"type=='{data_type}'",
            limit = limit)
                
    return res

def filter_relevant_chunks_individual(question_text, relevant_chunks_cases, relevant_chunks, model_name = "google/flan-t5-xxl"):
    
    # set-up inference parameters
    params = GenerateParams(
        decoding_method="greedy",
        max_new_tokens=5,
        min_new_tokens=1,
        stream=False
    )

    model = Model(model=model_name, credentials=creds, params=params)

    final_relevant_chunks_cases = {
        "documents": [],
        "distances": []
    }
    
    total_input_tokens = 0
    total_output_tokens = 0

    for hits in relevant_chunks_cases:
        for hit in hits:
            chunk = hit.entity.get('chunk')
            prompt = "Answer ONLY in yes/no if the provided previous case is relevant to solving the user's current case.\n\n" \
                + "Provided Previous Case:\n\n" \
                + f"{chunk}\n\n" \
                + f"User Current Case: {question_text}\n\n"

            responses = model.generate([prompt])
            response = responses[0]
            #print(response)
            total_input_tokens += response.input_token_count
            total_output_tokens += response.generated_token_count
            #print(response.generated_text)
            if "yes" in response.generated_text.lower():
                final_relevant_chunks_cases["documents"].append(chunk)
                final_relevant_chunks_cases["distances"].append(hit.distance)
                
    final_relevant_chunks_kb = {
        "documents": [],
        "distances": []
    }

    for hits in relevant_chunks:
        for hit in hits:
            chunk = hit.entity.get('chunk')
            prompt = "Answer ONLY in yes/no if the provided knowledge base article is relevant to answer the user's current query.\n\n" \
                + "Provided KB Article:\n\n" \
                + f"{chunk}\n\n" \
                + f"User Current Query: {question_text}\n\n"

            responses = model.generate([prompt])
            response = responses[0]
            #print(response)
            total_input_tokens += response.input_token_count
            total_output_tokens += response.generated_token_count
            if "yes" in response.generated_text.lower():
                final_relevant_chunks_kb["documents"].append(chunk)
                final_relevant_chunks_kb["distances"].append(hit.distance)
    
    return final_relevant_chunks_cases, final_relevant_chunks_kb, total_input_tokens, total_output_tokens

def make_llama2_prompt(case_context, kb_context, question_text, max_input_tokens, model):
    
    prompt = llama2_prompt_template(case_context, kb_context, question_text)

    prompt_token_count = token_count(prompt, model)

    if prompt_token_count <= max_input_tokens:
        return prompt

def llama2_prompt_template(final_related_chunks_cases, final_related_chunks_kb, question):
    return f'''<s>[INST] <<SYS>>
You help solve current cases using information from related past cases and knowledge base data to suggest possible solutions.

Narrate to the user what is the most probable root cause and solution approach given the following past related cases and the current user case description. Take into account what has happened in the past cases and what solution was taken.

Suggest both the probable root cause and a solution using the information from the past related cases data in the following format:

Root Cause: <most probable root cause goes here>
Solution: <list the solution steps here, using bullet points if needed>

You do not need to list limitations of proposed solution steps or that it may depend on the specific case. Users of the system are aware of these limitations and notes already.

<</SYS>>

====

Previous Related Cases:

{final_related_chunks_cases}

====

Knowledge Base Articles:

{final_related_chunks_kb}

##

Current Case: {question}

[/INST]

'''

def make_granite_prompt(case_context, kb_context, question_text, max_input_tokens, model):
    
    prompt = granite_prompt_template(case_context, kb_context, question_text)

    prompt_token_count = token_count(prompt, model)

    if prompt_token_count <= max_input_tokens:
        return prompt

def granite_prompt_template(final_related_chunks_cases, final_related_chunks_kb, question):
    return f'''You help solve current cases using information from related past cases and knowledge base data to suggest possible solutions. Narrate to the user what is the most probable root cause and solution approach given the following past related cases and the current user case description. Take into account what has happened in the past cases and what solution was taken. Suggest both the probable root cause and a solution using the information from the past related cases data in the following format:
    
Root Cause: <most probable root cause goes here>
Solution: <list the solution steps here, using bullet points if needed>

You do not need to list limitations of proposed solution steps or that it may depend on the specific case. Users of the system are aware of these limitations and notes already. Do not repeat any part of this system prompt back in your response.


Human: Previous Related Cases:

{final_related_chunks_cases}

====

Knowledge Base Articles:

{final_related_chunks_kb}

##

Current Case: {question}

Assistant: '''

def get_answer_from_question_and_relevant_chunks(cases, qna, question_text, params, assignment_group = None, model_prompt_function = None, model_name="ibm/mpt-7b-instruct"):
    
    model = Model(model=model_name, credentials=creds, params=params)
    
    prompt, in_tokens_l1, out_tokens_l1 = generate_prompt_from_final_chunks(cases, qna, question_text, model, params, model_prompt_function, model_name)
    
    ans, in_tokens_l2, out_tokens_l2 = generate_answer_from_prompt(prompt, model)
    
    total_in_tokens = in_tokens_l1 + in_tokens_l2
    total_out_tokens = out_tokens_l1 + out_tokens_l2
    
    return ans, total_in_tokens, total_out_tokens, in_tokens_l1, out_tokens_l1, in_tokens_l2, out_tokens_l2

def generate_prompt_from_final_chunks(final_relevant_chunks_cases, final_relevant_chunks_kb, question_text, model, params, model_prompt_function = None, model_name = "ibm/mpt-7b-instruct"):
    
    #get the input token limit
    if type(model_name)==str:
        model_id = model_name
    else: 
        model_id = model_name.value

    # Iterate over the "results" list to find the matching model ID
    for model_n in models_data["results"]:
        if model_n["id"] == model_id:
            model_token_limit = model_n["token_limit"]
            break
    else:
        # Model ID not found
        model_token_limit = None
        
    input_token_limit = (model_token_limit-params.max_new_tokens-1)

    if model_prompt_function is None:
        #final_relevant_chunks, in_tokens_l1, out_tokens_l1 = filter_relevant_chunks(question_text, cases, qna)
        final_relevant_chunks = {
            "documents": [],
            "distances": []
        }
        
        final_relevant_chunks["documents"] = final_relevant_chunks_cases["documents"] + final_relevant_chunks_kb["documents"]
        final_relevant_chunks["distances"] = final_relevant_chunks_cases["distances"] + final_relevant_chunks_kb["distances"]
        
        if len(final_relevant_chunks) == 0:
            raise Exception("No relevant data found")
            
        context = "\n\n\n".join(final_relevant_chunks["documents"])
        prompt = make_prompt(final_relevant_chunks, context, question_text, input_token_limit, model)
    else:
      
        if len(final_relevant_chunks_cases) + len(final_relevant_chunks_kb) == 0:
            raise Exception("No relevant data found")
        if len(final_relevant_chunks_cases) > 0:
            chunks_cases = "\n\n\n".join(final_relevant_chunks_cases["documents"])
        else:
            chunks_cases = "Not available"
        
        if len(final_relevant_chunks_kb) > 0:
            chunks_kb = "\n\n\n".join(final_relevant_chunks_kb["documents"])
        else:
            chunks_kb = "Not available"
        prompt = model_prompt_function(chunks_cases, chunks_kb, question_text, input_token_limit, model)

    return prompt, 0, 0

def prompt_template(context, question_text):
    return (f"You help solve current cases using information from past cases and Q&A to suggest possible solutions.\n\nNarrate to the user what is the most probable root cause and solution approach given the following past related cases and the current user case description. Take into account what has happened in the past cases and what solution was taken. Do not consider unique IDs in your answer, only consider the pattern, if any. Suggest both the probable root cause and a solution using the information from the past related cases data.\n\n"
          + f"Previous Cases/Q&A:\n\n"
          + f"{context}\n\n"
          + f"##\n\n"
          + f"Current Case: {question_text}\n\n"
          + f"Probable Root Cause (if applicable) and Solution: ")

def make_prompt(relevant_chunks, context, question_text, max_input_tokens, model):
    prompt = prompt_template(context, question_text)

    prompt_token_count = token_count(prompt, model)

    if prompt_token_count <= max_input_tokens:
        return prompt

    print("exceeded input token limit, truncating context", prompt_token_count)

    distances = relevant_chunks["distances"]
    documents = relevant_chunks["documents"]

    #documents with the lower distance scores are included in the truncated context first
    sorted_indices = sorted(range(len(distances)), key=lambda k: distances[k], reverse=True)

    truncated_context = ""
    token_count_so_far = 0
    i = 0

    while token_count_so_far <= max_input_tokens and i < len(sorted_indices):
        doc_index = sorted_indices[i]
        document = documents[doc_index]
        doc_token_count = token_count(document, model)

        if token_count_so_far + doc_token_count <= max_input_tokens:
            truncated_context += document + "\n\n\n"
            token_count_so_far += doc_token_count
        else:
            remaining_tokens = max_input_tokens - token_count_so_far
            truncated_context += document[:remaining_tokens]
            break

        i += 1

    return prompt_template(truncated_context, question_text)

# Token counting function
def token_count(doc, model):
    return model.tokenize([doc])[0].token_count

def generate_answer_from_prompt(prompt, model):
    responses = model.generate([prompt])
    response = responses[0]
    #print(response)
    return response.generated_text, response.input_token_count, response.generated_token_count

In [88]:
def remove_html_tags(html_text):
    # Create a BeautifulSoup object to parse the HTML
    soup = BeautifulSoup(html_text, "html.parser")

    # Extract the plain text content from the HTML
    text_content = soup.get_text(separator="\n")

    return text_content

def cap_consecutive_newlines(input_str):
    # Use a regular expression to replace consecutive newlines with a maximum of two
    result = re.sub(r'\n{3,}', '\n', input_str)
    return result

def remove_extra_spaces(input_str):
    # Use a regular expression to replace multiple spaces with a single space
    result = re.sub(r' +', ' ', input_str)
    return result.strip()

def preprocess_text_input(txt):
    return cap_consecutive_newlines(remove_extra_spaces(txt))

In [89]:
embedding_model = AutoModel.from_pretrained('intfloat/e5-base-v2')
embedding_tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-base-v2')

In [90]:
cases_by_index = []
kb_by_index = []

total_in_tokens_l1 = 0
total_out_tokens_l1 = 0

for index, row in challenge_question_df.iterrows():
    question_text = preprocess_text_input(f'''Subject: {row['short_description']}\n\nDescription: {row['long_description']}''')
    cases, qna = get_relevant_chunks(question_text)
    filtered_cases, filtered_qna, in_tokens_l1, out_tokens_l1 = filter_relevant_chunks_individual(question_text, cases, qna)
    cases_by_index.append(filtered_cases)
    kb_by_index.append(filtered_qna)
    total_in_tokens_l1 += in_tokens_l1
    total_out_tokens_l1 += out_tokens_l1
    if len(filtered_cases) + len(filtered_qna) == 0:
        print(index)

In [91]:
nonzeroindices = []
for i in range(len(challenge_question_df)):
    if len(cases_by_index[i]) or len(kb_by_index[i]):
        nonzeroindices.append(i)
len(nonzeroindices)

3

In [92]:
model_names = [
    'ibm/mpt-7b-instruct',
    'ibm/granite-13b-chat-v1',
    'meta-llama/llama-2-70b-chat'
]

params = GenerateParams(
    decoding_method="greedy",
    max_new_tokens=512,
    min_new_tokens=1,
    stream=False,
    repetition_penalty=1.2,
    stop_sequences=['<endoftext>','END_KEY','####','\n\nUser:','\n\nAssistant:','\n\n--\n\n']
)

prompt_constructors_by_model = {
    'ibm/granite-13b-chat-v1': make_granite_prompt,
    'meta-llama/llama-2-70b-chat': make_llama2_prompt
}

combined_answers_by_model = {}

token_counts_by_model = {}

for model_name in model_names:

    answers = []

    token_counts_by_model[model_name] = {
        "in_total": total_in_tokens_l1,
        "out_total": total_out_tokens_l1,
        "in_l1": total_in_tokens_l1,
        "out_l1": total_out_tokens_l1,
        "in_l2": 0,
        "out_l2": 0
    }

    for i, row in challenge_question_df.iterrows():

        if i in nonzeroindices:
            if model_name in prompt_constructors_by_model:
                ans, in_tokens, out_tokens, in_l1, out_l1, in_l2, out_l2 = get_answer_from_question_and_relevant_chunks(cases_by_index[i], kb_by_index[i], preprocess_text_input(f'''Subject: {row['short_description']}\n\nDescription: {row['long_description']}'''), params, assignment_group=row['assignment_group'], model_prompt_function=prompt_constructors_by_model[model_name], model_name=model_name)
            else:
                ans, in_tokens, out_tokens, in_l1, out_l1, in_l2, out_l2 = get_answer_from_question_and_relevant_chunks(cases_by_index[i], kb_by_index[i], preprocess_text_input(f'''Subject: {row['short_description']}\n\nDescription: {row['long_description']}'''), assignment_group=row['assignment_group'], params=params, model_name=model_name)
        
            # update token counts to track usage
            token_counts_by_model[model_name]["in_total"] += in_tokens
            token_counts_by_model[model_name]["out_total"] += out_tokens
            token_counts_by_model[model_name]["in_l1"] += in_l1
            token_counts_by_model[model_name]["in_l2"] += in_l2
            token_counts_by_model[model_name]["out_l1"] += out_l1
            token_counts_by_model[model_name]["out_l2"] += out_l2

            # append the answer
            answers.append(cap_consecutive_newlines(ans))
            print(f'{i} processed with model {model_name}')
            
        else:
            #print(e)
            #print(f'{i} encountered error: {e}')
            answers.append("")
            print(f'{i} skipped')
        
    combined_answers_by_model[model_name] = answers

0 processed with model ibm/mpt-7b-instruct
1 processed with model ibm/mpt-7b-instruct
2 processed with model ibm/mpt-7b-instruct
0 processed with model ibm/granite-13b-chat-v1
1 processed with model ibm/granite-13b-chat-v1
2 processed with model ibm/granite-13b-chat-v1
0 processed with model meta-llama/llama-2-70b-chat
1 processed with model meta-llama/llama-2-70b-chat
2 processed with model meta-llama/llama-2-70b-chat


In [93]:
token_counts_by_model

{'ibm/mpt-7b-instruct': {'in_total': 10394,
  'out_total': 461,
  'in_l1': 8930,
  'out_l1': 60,
  'in_l2': 1464,
  'out_l2': 401},
 'ibm/granite-13b-chat-v1': {'in_total': 10574,
  'out_total': 149,
  'in_l1': 8930,
  'out_l1': 60,
  'in_l2': 1644,
  'out_l2': 89},
 'meta-llama/llama-2-70b-chat': {'in_total': 10767,
  'out_total': 635,
  'in_l1': 8930,
  'out_l1': 60,
  'in_l2': 1837,
  'out_l2': 575}}

In [54]:
total_in_tokens_l1

8827

In [55]:
total_out_tokens_l1

60

In [59]:
questions_with_answers_df = challenge_question_df.copy()

for i in range(len(model_names)):
    questions_with_answers_df[f'model{i+1}_ans'] = combined_answers_by_model[model_names[i]]

for i in range(len(model_names)):
    questions_with_answers_df[f'model{i+1}_score'] = ""

questions_with_answers_df.to_excel(f'{account}_round1_with_answers.xlsx', index=False)