In [1]:
# Part 0 - Setup environmental variables; should be moved to separate place

import os
from dotenv import load_dotenv

# Load variables from .env file
load_dotenv()

# Access the variables
email = os.getenv("EMAIL")
api_token = os.getenv("JIRA_API_TOKEN")
server_url = os.getenv("SERVER_URL")
openai_api_key = os.getenv("OPENAI_API_KEY")

# Use the variables in your code
os.environ["OPENAI_API_KEY"] = openai_api_key
import openai
openai.api_key = openai_api_key


In [2]:
# Part 1 - Customize llama_index Jira Reader as original one is not capable of reading in metadata with Dict or List

from llama_index.readers.jira import JiraReader
from llama_index.core.schema import Document
from llama_index.core.ingestion.pipeline import run_transformations

class MyJiraReader(JiraReader):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.verbose = False

    def set_verbose(self, verbose):
        self.verbose = verbose

    def load_data(self, query: str, start_at: int = 0, max_results: int = 50):
        issues = super().load_data(query, start_at, max_results)

        issues_with_clean_metadata = []
        for issue in issues:
            # Get the full Jira issue
            jira_issue = self.jira.issue(issue.doc_id)
            print(f"Jira ticket ID: {issue.doc_id} - URL: {issue.extra_info.get('url', 'N/A')}")
            
            # Fetch the comments (Move comments out of metadata)
            comments = jira_issue.fields.comment.comments
            comments_text = [comment.body for comment in comments]
            comments_str = "\n".join(comments_text)  # Join comments into a single string

            # Get all metadata dynamically from the issue's extra_info
            extra_info = issue.extra_info

            # Ensure that all metadata values conform to valid types
            extra_info = self.ensure_metadata_compliance(extra_info)

            # Move comments into the main content (not metadata)
            main_text_with_comments = issue.text + "\nComments:\n" + comments_str

            # Append the updated issue with comments and compliant metadata to the list
            issues_with_clean_metadata.append(
                Document(
                    text=main_text_with_comments,  # Include comments in the main text
                    doc_id=issue.doc_id,
                    extra_info=extra_info  # Metadata without comments
                )
            )

        # Now, apply transformations and log metadata
        nodes = run_transformations(
            issues_with_clean_metadata,
            transformations=[],  # Provide an empty list if no transformations are required
            show_progress=True  # Optionally show progress
        )

        # Log metadata sizes by type and total size after transformations, controlled by verbose flag
        for node in nodes:
            total_size, metadata_size, metadata_breakdown, main_text_size = self.calculate_metadata_size_by_type(node)
            print(f"Transformed node ID: {node.doc_id}, total size: {total_size}, metadata size: {metadata_size}, main text size: {main_text_size}")
            # <-- Only print logs if verbose is True
            if self.verbose:  
                print(f"Metadata breakdown for node {node.doc_id}:")
                for field, size in metadata_breakdown.items():
                    print(f"  - {field}: {size} characters")

            # Check if the metadata size exceeds the chunk size
            if metadata_size > 1024:
                print(f"Node with ID: {node.doc_id} exceeds chunk size after transformation. Metadata size: {metadata_size}")
                if self.verbose:
                    print(f"Full metadata for node {node.doc_id}: {node.extra_info}")

        return nodes
    
    def ensure_metadata_compliance(self, metadata):
        """Ensure that all metadata values are of valid types."""
        if isinstance(metadata, dict):
            for key, value in metadata.items():
                # If the value is a list, convert it to a comma-separated string
                if isinstance(value, list):
                    metadata[key] = ', '.join(map(str, value))
                # If the value is a dictionary, convert it to a string (or JSON string if needed)
                elif isinstance(value, dict):
                    metadata[key] = str(value)  # Or use json.dumps(value) for JSON format
                # For all other types, leave it as is if it's str, int, float, or None
                elif not isinstance(value, (str, int, float, type(None))):
                    metadata[key] = str(value)  # Convert unsupported types to string
        return metadata
    
    def calculate_metadata_size_by_type(self, node):
        """Calculate both the total size of the document and the size of each metadata field."""
        text_length = len(node.text) if node.text else 0  # Length of the main text
        metadata_size = 0
        metadata_breakdown = {}

        # Calculate the size of each metadata field
        if node.extra_info:
            for key, value in node.extra_info.items():
                field_size = len(str(value)) if value else 0
                metadata_size += field_size
                metadata_breakdown[key] = field_size

        # Calculate the total size (text + metadata)
        total_size = text_length + metadata_size

        return total_size, metadata_size, metadata_breakdown, text_length



In [3]:
# Part 2 - Connect to Jira system, and read Jira tickets 
reader = MyJiraReader(
    email=email, api_token=api_token, server_url=server_url
) 

In [4]:
# Part 3 - Configure llama_index, including setting up embedding
# This works for llama_index Version: 0.11.14
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex, Document
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

CHUNK_SIZE = 500

# Configure embedding model
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

db = chromadb.PersistentClient(path="./storage/chroma_cr_jira")
collection = db.get_or_create_collection("cr_jira_db")

# Indexing documents with chunking
def split_document_into_chunks(document, chunk_size=CHUNK_SIZE):
    """
    Splits a document into chunks with the specified size.
    """
    text = document.text
    chunks = []
    for idx, i in enumerate(range(0, len(text), chunk_size)):
        chunk_text = text[i:i + chunk_size]
        chunk_document = Document(
            text=chunk_text,
            doc_id=document.doc_id,
            extra_info={
                "chunk_idx": idx,  # Track chunk index
                **document.extra_info  # Carry over additional metadata
            }
        )
        chunks.append(chunk_document)
    return chunks

def index_documents_with_chunking(documents, chunk_size=CHUNK_SIZE):
    """
    Index documents by splitting them into smaller chunks.
    """
    chunked_documents = []
    for document in documents:
        chunks = split_document_into_chunks(document, chunk_size)
        chunked_documents.extend(chunks)
    
    vector_store = ChromaVectorStore(chroma_collection=collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    
    index = VectorStoreIndex.from_documents(chunked_documents, storage_context=storage_context)
    
    return index

# Now run the indexing process
reader.set_verbose(False)
documents = reader.load_data(query='Project = "Cybereason Defenders" AND status = Closed ORDER BY created DESC', start_at=0, max_results=500)
index = index_documents_with_chunking(documents, chunk_size=CHUNK_SIZE)

  from .autonotebook import tqdm as notebook_tqdm


Jira ticket ID: 328390 - URL: https://cybereason.atlassian.net/browse/DFND-70632
Jira ticket ID: 328327 - URL: https://cybereason.atlassian.net/browse/DFND-70617
Jira ticket ID: 328109 - URL: https://cybereason.atlassian.net/browse/DFND-70567
Jira ticket ID: 328104 - URL: https://cybereason.atlassian.net/browse/DFND-70566
Jira ticket ID: 328077 - URL: https://cybereason.atlassian.net/browse/DFND-70553
Jira ticket ID: 328045 - URL: https://cybereason.atlassian.net/browse/DFND-70549
Jira ticket ID: 328016 - URL: https://cybereason.atlassian.net/browse/DFND-70537
Jira ticket ID: 327802 - URL: https://cybereason.atlassian.net/browse/DFND-70506
Jira ticket ID: 327801 - URL: https://cybereason.atlassian.net/browse/DFND-70505
Jira ticket ID: 327720 - URL: https://cybereason.atlassian.net/browse/DFND-70486
Jira ticket ID: 327717 - URL: https://cybereason.atlassian.net/browse/DFND-70483
Jira ticket ID: 327709 - URL: https://cybereason.atlassian.net/browse/DFND-70475
Jira ticket ID: 326945 - URL

In [5]:
# Part 5 - search context from vector database

search_text = "Any issue related to not able to increase policy limit"
embedding = Settings.embed_model.get_text_embedding(search_text)
results = collection.query(
    query_embeddings=[embedding],
    n_results=1  # Limit to only the top result
)

# Function to filter out duplicate lines and limit context size
def filter_context(context_str, max_size=1000):
    # Remove duplicate lines by converting to a set and back to a list
    unique_lines = list(dict.fromkeys(context_str.split("\n")))
    filtered_context = "\n".join(unique_lines)

    # Limit context to max_size characters
    if len(filtered_context) > max_size:
        filtered_context = filtered_context[:max_size]

    return filtered_context

# Extract context and metadata from the first result
document = results['documents'][0]  # Extract the document text
metadatas = results['metadatas'][0]  # Extract the metadata list
context_str = "\n".join(document)

# Get metadata (url and title)
url = metadatas[0].get('url', 'No URL available')
title = metadatas[0].get('title', 'No Title available')

# Add url and title to the context string
context_str = f"Title: {title}\nURL: {url}\n\n{context_str}"

# Filter the context string to remove duplicates and limit size
context_str = filter_context(context_str, max_size=1000)

# Print the metadata and context
print("\nFinal Context Sent to ChatGPT:\n", context_str)


Final Context Sent to ChatGPT:
 Title: 00342462 | can't increase policies limit
URL: https://cybereason.atlassian.net/browse/DFND-69594

00342462 | can't increase policies limit 
 Hello,
----
I can’t increase the policies limit…
Related KB:
[https://nest.cybereason.com/knowledgebase/2584194|https://nest.cybereason.com/knowledgebase/2584194]
Consul:
[http://cr-consul.prod.cybereason.net:8500/ui/us-east-1/kv/ms-suits/sensor-management-suite/stacks/bnkbqmsr-stack/config/sm-management/service.policy.max-allowed/edit|http://cr-consul.prod.cybereason.net:8500/ui/us-east-1/kv/ms-suits/sensor-management-suite/stacks/bnkbqmsr-stack/config/sm-management/service.policy.max-allowed/edit]
Java exception seen in catalina during the process of creating test policy called “test1234”.
Sharing output of catalina during the policy creation process: “creating_policy_test1234_catalina.log”
{noformat}java.util.concurrent.ExecutionException: java.lang.RuntimeException: io.grpc.StatusRuntimeException: UNKNOW

In [6]:
# Part 6 - Query with context

from llama_index.core import PromptTemplate

# Modify the prompt template to emphasize inclusion of the URL in the response
text_qa_template_str = (
    "You have very comprehensive knowledge and deep insights into cybersecurity, network, and operating system domains.\n"
    "Always answer the query using only the provided context information, and not prior knowledge.\n"
    "Some rules to follow: \n"
    "1. Always provide the Jira ticket URL when answering with information from any Jira ticket.\n"
    "2. Using both the context information and your own knowledge.\n"
    "3. Always make sure to include the URL of the Jira ticket provided in the context.\n"
    "Context information is below. \n"
    "-----------------\n"
    "{context_str}\n"
    "-----------------\n"
    "Answer the question: {query_str}\n"
)

text_qa_template = PromptTemplate(text_qa_template_str)

# Prepare the query and context
query_str = search_text

# Render the prompt template with the context and query
rendered_prompt = text_qa_template.format(context_str=context_str, query_str=query_str)

# Calculate and print the length of the text sent to ChatGPT
print("\nLength of the prompt sent to ChatGPT:", len(rendered_prompt))

# Log the prompt that will be sent to ChatGPT
print("Prompt sent to ChatGPT:\n")
print(rendered_prompt)


Length of the prompt sent to ChatGPT: 1613
Prompt sent to ChatGPT:

You have very comprehensive knowledge and deep insights into cybersecurity, network, and operating system domains.
Always answer the query using only the provided context information, and not prior knowledge.
Some rules to follow: 
1. Always provide the Jira ticket URL when answering with information from any Jira ticket.
2. Using both the context information and your own knowledge.
3. Always make sure to include the URL of the Jira ticket provided in the context.
Context information is below. 
-----------------
Title: 00342462 | can't increase policies limit
URL: https://cybereason.atlassian.net/browse/DFND-69594

00342462 | can't increase policies limit 
 Hello,
----
I can’t increase the policies limit…
Related KB:
[https://nest.cybereason.com/knowledgebase/2584194|https://nest.cybereason.com/knowledgebase/2584194]
Consul:
[http://cr-consul.prod.cybereason.net:8500/ui/us-east-1/kv/ms-suits/sensor-management-suite/st

In [7]:
# Send the prompt to the query engine
response = index.as_query_engine(
    text_qa_template=text_qa_template
).query(query_str)

# Optionally log the response
print("\nResponse from ChatGPT:\n")
print(response)


Response from ChatGPT:

Based on the provided context information from Jira ticket 00342462, it seems that the issue reported is related to not being able to increase the policies limit. The user mentioned that they can't increase the policies limit and provided a link to a related Knowledge Base article and a Consul link for configuration.

To troubleshoot this issue further, the user also shared a Java exception seen in catalina during the process of creating a test policy called "test1234". The output of catalina during the policy creation process is shared in the file "creating_policy_test1234_catalina.log".

To address this issue, it would be necessary to review the Java exception in the catalina log file to understand the specific error message and stack trace that occurred during the policy creation process. Additionally, checking the configuration in Consul related to the policies limit could also provide insights into why the limit cannot be increased.

For a more detailed an

In [121]:
# Prepare the query and context
query_str = "Please suggest link of Jira ticket which your answer is based on"

# Render the prompt template with the context and query
rendered_prompt = text_qa_template.format(context_str=context_str, query_str=query_str)

# Calculate and print the length of the text sent to ChatGPT
print("\nLength of the prompt sent to ChatGPT:", len(rendered_prompt))

# Log the prompt that will be sent to ChatGPT
print("Prompt sent to ChatGPT:\n")
print(rendered_prompt) 


Length of the prompt sent to ChatGPT: 1549
Prompt sent to ChatGPT:

You have very comprehensive knowledge and deep insights into cybersecurity, network and operating system domain.
Always answer the query only using the provided context information, and not prior knowledge.
Some rules to follow: 
1. Using both the context information and also using your own knowledge.
2. Give link of the Jira ticket if you are answering with information from any Jira ticket.
Context information is below. 
-----------------
try, please make sure they also send the *installation logs*, as in last time

Thanks
Hello [~accountid:61f62b8dacd4cb0069befbe1], thanks for uploading the files. Per reviewing the procmon file, I still believe Morphisec may be the cause to the issue.

ProtectorService.exe is Morphisec sensor/agent, we can see it running in parallel to our installer and also doing actions related to the installer (on the installer)

The difference between the existing working machines (morphisec + c

In [3]:
import openai

# Set the API key directly
openai.api_key = "your_new_api_key"

# Make sure to use the correct ChatCompletion method
response = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[{"role": "system", "content": "Test message"}]
)

# Print the response
print(response.choices[0].message.content)

APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
