In [None]:
%pip install openai langchain pypdf chromadb tiktoken jupyterlab

In [1]:
import os
with open("openai.key", "r") as f:
    key = f.read()
    os.environ["OPENAI_API_KEY"] = key


In [2]:
from pathlib import Path

reports_dir = Path("reports/")
reports = []
for file in reports_dir.iterdir():
    reports.append(str(file))

In [3]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

documents = []

for report in reports:
    loader = PyPDFLoader(report)
    documents.extend(loader.load())

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap  = 0,
    length_function = len,
)


In [4]:
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(texts, embeddings)
#retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":1})
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)

Using embedded DuckDB without persistence: data will be transient


In [5]:
from langchain.callbacks.base import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import (
    ConversationalRetrievalChain,
    LLMChain
)
from langchain.chat_models import ChatOpenAI

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
condense_question_prompt = PromptTemplate.from_template(_template)

template = """You are an AI assistant for answering questions about threat intelligence.
You are given the following extracted parts of a long intelligence document and a question. Provide a concise and accurate answer.
If you don't know the answer, just say "I don't have the data to answer that." Don't try to make up an answer.
Question: {question}
=========
{context}
=========
Answer in Markdown:"""
qa_prompt = PromptTemplate(template=template, input_variables=["question", "context"])


# define two LLM models from OpenAI
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
 
streaming_llm = OpenAI(
    streaming=True,
    callback_manager=CallbackManager([
        StreamingStdOutCallbackHandler()
    ]),
    verbose=True,
    max_tokens=150,
    temperature=0.2
)
 
# use the LLM Chain to create a question creation chain
question_generator = LLMChain(
    llm=llm,
    prompt=condense_question_prompt
)
 
# use the streaming LLM to create a question answering chain
doc_chain = load_qa_chain(
    llm=streaming_llm,
    chain_type="stuff",
    prompt=qa_prompt
)

In [6]:
chatbot = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(),
    combine_docs_chain=doc_chain,
    question_generator=question_generator,
    return_source_documents=True
)

In [7]:
# create a chat history buffer
chat_history = []
# gather user input for the first question to kick off the bot
question = input()

# keep the bot running in a loop to simulate a conversation
while True:
    result = chatbot(
        {"question": question, "chat_history": chat_history}
    )
    print("\n\nReferences:")
    [print("{}".format(document.metadata['source'])) for document in result['source_documents']]
    print("========================")
    chat_history.append((result["question"], result["answer"]))
    question = input()

 List Russian APT groups and their modus operandi



APT28 (Sofacy/Fancy Bear) is associated with the military intelligence GRU and is known for its long-term cyber operations with a clear direction based on Russia’s interests and objectives. SNAKE (Turla) is tied to the Federal Security Service FSB, and APT29 (Cozy Bear/The Dukes) is associated with the FSB and the foreign intelligence service SVR. These groups are known for targeting government and military entities, private companies, and using freely available online tools to masquerade their attacks. Examples of their activities include the GRU cyber operations against the World Anti-Doping Agency (WADA) in September 2016 and against the International Olympic Committee (IOC)

References:
reports/efia2018.pdf
reports/ESET_Industry_Report_Government.pdf
reports/efia2019.pdf
reports/ESET_Industry_Report_Government.pdf


 Do the same for North Korea and China



APT groups active in North Korea and China include Turla, Gamaredon, XDSpy, and Sandworm. Turla is known for targeting government and military entities, while Gamaredon is known for targeting governmental organizations in Ukraine. XDSpy has managed to stay under the radar for nine years, and Sandworm is one of the most dangerous APT groups in operation. These groups use a variety of tactics, including fileless malware, custom open-source PowerShell loaders, and exploiting Microsoft Outlook and Microsoft Exchange servers.

References:
reports/ESET_Industry_Report_Government.pdf
reports/ESET_Industry_Report_Government.pdf
reports/sapo2022.pdf
reports/ESET_Industry_Report_Government.pdf


 Why is Russa a threat to Sweden?



Ryssland utgör det största hotet mot Sveriges säkerhet. Ryssland har avsikt och förmåga att försvaga Sveriges handlingskraft och säkerhet. Målet för Rysslands aktiviteter mot Sverige är att verka för att Sverige ska vara alliansfritt och hållas utanför försvarsalliansen Nato. Ryssland bedriver kontinuerligt underrättelseinhämtning i Sverige, inklusive från rys

References:
reports/sapo2021.pdf
reports/sapo2020.pdf
reports/sapo2022.pdf
reports/sapo2019.pdf


 Is Norway also a target?



Ryssland har avsikt och förmåga att försvaga Sveriges handlingskraft och säkerhet. Detta tar sig uttryck i kontinuerlig rysk säkerhetshotande verksamhet mot det svenska samhället. Rysslands mål är att verka för att Sverige ska vara alliansfritt och hållas utanför försvarsalliansen Nato, samt att stärka den ryska regimens ställning hos den egna befolkningen. F

References:
reports/sapo2019.pdf
reports/sapo2020.pdf
reports/sapo2021.pdf
reports/sapo2021.pdf


 Is Russia a threat to Norway?



It appears that Russia has a critical view of Norway's NATO membership and support to the EU's sanctions regime against Russia. Russia has also expressed concern about Norway's increased military activity and closer integration with the United States, which they believe is fuelling anti-Russian sentiment in the Norwegian population. Russia has also demonstrated its own military capability through strategic sorties over the Norwegian Sea and live fire off the coast of Finnmark. Additionally, Russia has expressed concern about NATO activity in the Arctic, which they believe undermines stability and trust between Norway and Russia.

References:
reports/nis2020.pdf
reports/nis2019.pdf
reports/nis2017.pdf
reports/nis2020.pdf


KeyboardInterrupt: Interrupted by user

In [None]:
'''
result = qa_chain({"question": "What is China's modus operandi?", "chat_history": None})
print(result)
'''

In [None]:
'''
query = "What is China's modus operandi?"
r = qa({"query": query})
print(f"Svar:{r.get('result')}")
print(f"\nReferenser:")
for document in r.get('source_documents'):
    print(f"Rapport: {document.metadata['source']}, sida {document.metadata['page']}")
'''