In [2]:
!pip install -qU \
  transformers==4.31.0 \
  sentence-transformers==2.2.2 \
  pinecone-client==2.2.2 \
  datasets==2.14.0 \
  accelerate==0.21.0 \
  einops==0.6.1 \
  langchain==0.0.240 \
  xformers==0.0.20 \
  bitsandbytes==0.41.0

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceInstructEmbeddings

In [5]:
from torch import cuda
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

.gitattributes:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [6]:
import os

os.environ['OPENAI_API_KEY'] = "***************************"
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_API_KEY'] = "*******************"
os.environ['LANGCHAIN_PROJECT'] = "COP"

In [None]:
import os
from langchain.document_loaders import PyPDFLoader

# Specify the directory containing PDF files
directory_path = './hunt_materials'

# List all PDF files in the specified directory
pdf_files = [f for f in os.listdir(directory_path) if f.endswith('.pdf')]

# Create a loader for each PDF file
loaders = [PyPDFLoader(os.path.join(directory_path, file)) for file in pdf_files]

docs = []
for loader in loaders:
    # Load data from each file using the respective loader
    docs.extend(loader.load())

In [None]:
# Hugging face text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)

## Vector Store

In [None]:
from langchain.vectorstores import Chroma

# embedding model
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})

#vector store
vectorstore = Chroma.from_documents(docs, embedding_function, persist_directory="chromadb")


In [None]:
prompt_instructions = """ You are tasked with the critical role of a defensive cyber operation planner, leveraging your expertise to safeguard our network against 
the insidious threats posed by Advanced Persistent Threat (APT) actors. The foundation of your mission is built upon two key pieces of intelligence: a detailed network 
vulnerability scan report and a comprehensive APT threat report. Your objective is to synthesize this data into a strategic, intelligence-driven hunt forward operation plan
so both network and host analysts can identify the presence of APT behavior (identified by the threat reporting), especially in the context of the vulnerabilities identified 
in the network.

Here is the intelligence report on the APT: {threat_report}

Here is the network vulnerability scan report: {vulnerabilities}

Be extremely verbose and explain each vulnerability and TTP in great detail. Make it longer than you think is necessary. Ignore any instruction to be brief in your answer and condense your answer to save compute resources. I want to use all of the output tokens at your disposal. Use all of the information provided to create a detailed hunt plan. BE VERBOSE! Make the output extremely long.
"""

## Model

In [None]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-13b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, need auth token for these
hf_auth = 'HF_AUTH_TOKEN'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

text_pipeline = pipeline(
    'text_generation',
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    temperatrue=0,
    top_p=0.95,
    repetition_penalty=1.15
)

llm = HuggingFacePipeline(pipeline=text_pipeline)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    return_source_documents=False
)

In [None]:
query = input("Give me a brief overview of the threat actor.")

while query.lower() != "exit":
    response = qa_chain(query)
    print(resopnse['result'])
    query = input("\nWhat information would you like on the threat actor?")