In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
import tensorflow as tf
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)

In [None]:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
embeddings = model.encode(sentences)
#print(embeddings)

## load mitre sigma master csv

In [None]:
import pandas as pd 
mitre_sigma = pd.read_csv("MITRE/mitreembed_master_Chroma.csv")


In [None]:
import pandas as pd
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
# download embeddings model
original_model = SentenceTransformer('all-MiniLM-L12-v2')
# reload model using langchain wrapper
original_model.save('./')

embedding_model_path = './'
embedding_model = HuggingFaceEmbeddings(model_name=embedding_model_path)

In [None]:
#load model
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L12-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L12-v2')



In [None]:
embedding_model = HuggingFaceEmbeddings(model_name=embedding_model_path)

In [None]:
#set chromadb 
#import pydantic
#from pydantic_settings import BaseSettings

from langchain.vectorstores import Chroma
# define logic for embeddings storage
chromadb_path = './'
import chromadb
chroma_client = chromadb.Client()
chroma_client.get_version()

In [None]:
from sentence_transformers import SentenceTransformer
 
from langchain.document_loaders import DataFrameLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import re
#import mlflow
 
import pandas as pd
# assemble product documents in required format (id, text)
documents = (
  DataFrameLoader(
    #CISA_df_pd,
    mitre_sigma,
    page_content_column='Body'
    )
    .load()
  )

In [None]:
# define logic for embeddings storage
chromadb_path = './'

vectordb = Chroma.from_documents(
  documents=documents, 
  embedding=embedding_model, 
  persist_directory=chromadb_path, 
  #collection_name = 'CISA_MITRE'
  )
 
# persist vector db to storage
vectordb.persist()

In [None]:
#delete vectors
#vectordb.delete_collection()

#count documents 
vectordb._collection.count()



In [None]:
#examine a vector db record
rec= vectordb._collection.peek(1)
print('Metadatas:  ', rec['metadatas'])
print('Documents:  ', rec['documents'])
print('ids:        ', rec['ids']) 
print('embeddings: ', rec['embeddings'])

In [None]:
vectordb.similarity_search_with_score("8080")

In [None]:
vectordb.similarity_search_with_relevance_scores("MoveIt Vulnerability", k=50, score_threshold=0.80, search_type="hybrid")


In [None]:
class ProductSearchWrapper:
    def __init__(self, embedding_model, chromadb, max_results=25):
        # Retrieve embedding model
        self.embedding_model = HuggingFaceEmbeddings(model_name=embedding_model_path)

        # Retrieve vectordb contents
        self._vectordb = Chroma(persist_directory=chromadb_path, embedding_function=self.embedding_model)

        # Set number of results to returnd
        self._max_results = max_results

    def predict(self, query):
        # Perform search on embeddings
        raw_results = self._vectordb.similarity_search_with_score(query, k=self._max_results)

 
        # get lists of of scores, descriptions and ids from raw results
        scores, Body, Subject, Date, filepath, Source = zip(
          *[(r[1], r[0].page_content, r[0].metadata['Subject'], r[0].metadata['Date'], r[0].metadata['filepath'], 
             r[0].metadata['Source']) 
             for r in raw_results] 
          )
        results_pd = pd.DataFrame({        
            #'Body':Body,
            'Subject':Subject,
            'Date':Date,
            'Body':Body,
            'filepath':filepath,
            'Source':Source,
            'score':scores
          }).sort_values(axis=0, by='score', ascending=True)

        #set return value
        return results_pd
        #return raw_results


In [None]:
# Instantiate the model
#model objective:
"""
The model is intented to be used as a sentence and short paragraph encoder. 
Given an input text, it ouptuts a vector which captures the semantic information. 
The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
"""

model = ProductSearchWrapper(embedding_model=embedding_model_path, chromadb=chromadb_path, max_results=25)


# Call the model's predict method with a query
#query = "Which is the most recent CISA advisory on Ivanti"
query = "MoveIT vulnerability"
results = model.predict(query)

df_results = pd.DataFrame(results)
#df_results

#df_results.to_csv('result_test.csv')
# response_text = df_results[['Subject', 'Date','filepath','score']]
# response_text
#str(response_text.iloc[0]['Subject'])

#df_results.iloc[0]
#df_results.iloc[0]['Body']
df_results

## LLM integration

In [None]:
import torch
use_cuda = torch.cuda.is_available()
if use_cuda:
    print('__CUDNN VERSION:', torch.backends.cudnn.version())
    print('__Number CUDA Devices:', torch.cuda.device_count())
    print('__CUDA Device Name:',torch.cuda.get_device_name(0))
    print('__CUDA Device Total Memory [GB]:',torch.cuda.get_device_properties(0).total_memory/1e9)

In [None]:
import gc
import torch

gc.collect()

torch.cuda.empty_cache()

In [None]:
## initialize LLM wizardlm-13b

In [None]:
n_gpu_layers = 35  # Metal set to 1 is enough.
#n_batch = 1042  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    #model_path="Lilly/Lily-7B-Instruct-v0.2.Q5_K_M.gguf",
    model_path="./wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.gguf.bin",
    n_gpu_layers=n_gpu_layers,
 #   n_batch=n_batch,
 #   temperature=0.75,
    max_tokens=2000,
 #   top_p=1,
    n_ctx=2048,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=False,
)

In [None]:
#%tensorflow_version 2.x
#!pip install tf-nightly
import tensorflow as tf
device_name = tf.test.gpu_device_name()
print(device_name)

## intialize koboldcpp

In [None]:
#https://medium.com/p/9f890e6960f3
# Successfully uninstalled langchain-0.0.198

from langchain import PromptTemplate, LLMChain


vectordb = Chroma(persist_directory=chromadb_path, embedding_function=embedding_model)
#retrievers

retriever = vectordb.as_retriever()

from langchain.chains import RetrievalQA

# qa_chain = RetrievalQA.from_chain_type(
#     llm,
#     retriever=vectordb.as_retriever()
# )

# Build prompt
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)# Run chain

from langchain.chains import RetrievalQA, ConversationalRetrievalChain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    #chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    retriever=vectordb.as_retriever(search_kwargs={"k": 1}),
    return_source_documents=True,
    verbose=False,
)
from langchain.llms import KoboldApiLLM
llm = KoboldApiLLM(endpoint="http://localhost:5001", max_length=80)
#llm = KoboldApiLLM(endpoint="http://54.165.180.47:5001", max_length=80)

In [None]:
res = qa("Tell me about the MoveIT vulnerability.")
res