In [31]:
# %pip install --upgrade --quiet  arxiv
# %pip install --upgrade --quiet  pymupdf

In [32]:
### Import Packages
import pandas as pd

In [33]:
import os
from dotenv import load_dotenv

load_dotenv()

### OpenAI
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

from langchain_community.document_loaders import ArxivLoader

### Load Documents

In [34]:
docs = ArxivLoader(query="security penetration testing", load_all_available_meta=True, load_max_docs=200).load()

In [36]:
from datetime import datetime

security_dataset = []
for i, doc in enumerate(docs):
    published_str = doc.metadata['Published']#'2019-08-01'
    date_object = datetime.strptime(published_str, '%Y-%m-%d').date()

    if date_object > datetime.strptime("2024-04-18", '%Y-%m-%d').date():
        print(i ,", ",date_object," : ", doc.metadata["Title"], " by: ", doc.metadata["Authors"])
        security_dataset.append(doc)

7 ,  2024-06-02  :  PentestGPT: An LLM-empowered Automatic Penetration Testing Tool  by:  Gelei Deng, Yi Liu, Víctor Mayoral-Vilches, Peng Liu, Yuekang Li, Yuan Xu, Tianwei Zhang, Yang Liu, Martin Pinzger, Stefan Rass
35 ,  2024-06-12  :  PTHelper: An open source tool to support the Penetration Testing process  by:  Jacobo Casado de Gracia, Alfonso Sánchez-Macián
62 ,  2024-06-08  :  Interactive Greybox Penetration Testing for Cloud Access Control using IAM Modeling and Deep Reinforcement Learning  by:  Yang Hu, Wenxi Wang, Sarfraz Khurshid, Mohit Tiwari
75 ,  2024-05-24  :  Knowledge-Informed Auto-Penetration Testing Based on Reinforcement Learning with Reward Machine  by:  Yuanliang Li, Hanzheng Dai, Jun Yan
81 ,  2024-05-26  :  Make Safe Decisions in Power System: Safe Reinforcement Learning Based Pre-decision Making for Voltage Stability Emergency Control  by:  Congbo Bi, Lipeng Zhu, Di Liu, Chao Lu
85 ,  2024-05-09  :  Artificial Intelligence as the New Hacker: Developing Agents f

In [37]:
len(security_dataset)

11

### Save Documents

In [39]:
from  langchain.schema import Document
import json
from typing import Iterable

def save_docs_to_jsonl(array:Iterable[Document], file_path:str)->None:
    with open(file_path, 'w') as jsonl_file:
        for doc in array:
            jsonl_file.write(doc.json() + '\n')

def load_docs_from_jsonl(file_path)->Iterable[Document]:
    array = []
    with open(file_path, 'r') as jsonl_file:
        for line in jsonl_file:
            data = json.loads(line)
            obj = Document(**data)
            array.append(obj)
    return array
    
save_docs_to_jsonl(security_dataset,'./security_dataset2.jsonl')


docs2=load_docs_from_jsonl('./security_dataset2.jsonl')

print(len(docs2))

11


### Generate Q&A

In [40]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# generator with openai models
generator_llm = ChatOpenAI(model="gpt-4o")
critic_llm = ChatOpenAI(model="gpt-4o")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

# Change resulting question type distribution
distributions = {
    simple: 0.4,
    multi_context: 0.4,
    reasoning: 0.2
}

# use generator.generate_with_llamaindex_docs if you use llama-index as document loader
testset = generator.generate_with_langchain_docs(docs2, 60, distributions) 
security_qna_dataset = testset.to_pandas()
security_qna_dataset

embedding nodes:   0%|          | 0/462 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/60 [00:00<?, ?it/s]

Failed to parse output. Returning None.
Failed to parse output. Returning None.
Failed to parse output. Returning None.
Failed to parse output. Returning None.
IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What metrics are used to assess the syntactic ...,[ PowerShell code. Both the generated commands...,The metrics used to assess the syntactic quali...,simple,"[{'Published': '2024-04-19', 'Title': 'The Pow...",True
1,How did GPT-3.5 perform in end-to-end penetrat...,[ to assess the performances of various LLMs i...,GPT-3.5 successfully completed 1 end-to-end pe...,simple,"[{'Published': '2024-06-02', 'Title': 'Pentest...",True
2,What is the performance of PENTESTGPT in the p...,"[\ndesign, which retains the full testing cont...",PENTESTGPT's performance in the picoMini CTF c...,simple,"[{'Published': '2024-06-02', 'Title': 'Pentest...",True
3,How does AI-driven command generation enhance ...,[Figure 4.2: Example Task Tree\n4.4.2\nDynamic...,AI-driven command generation enhances the effi...,simple,"[{'Published': '2024-05-09', 'Title': 'Artific...",True
4,How was PThelper functionality validated in a ...,[ only chatgpt NLPAgent is available.\n• Type ...,PThelper functionality was validated in a simu...,simple,"[{'Published': '2024-06-12', 'Title': 'PTHelpe...",True
5,How does the availability of source code refle...,[\n100.00%\n(b) Source Code Available\nTable 4...,The availability of source code for more than ...,simple,"[{'Published': '2024-07-19', 'Title': 'Bridgin...",True
6,What is the process for generating a Type-II p...,[\nmission; for each entity with one of these ...,For each entity with one of the specified type...,simple,"[{'Published': '2024-06-08', 'Title': 'Interac...",True
7,How does tree reconstruction help in understan...,[ a set of directories utilized\nin a brute-fo...,Tree reconstruction helps in understanding the...,simple,"[{'Published': '2024-04-22', 'Title': 'Offensi...",True
8,How are large language models (LLMs) being uti...,"[ www.makotemplates.org/. Accessed: April 16, ...",Large language models (LLMs) are being utilize...,simple,"[{'Published': '2024-05-09', 'Title': 'Artific...",True
9,How does GPT-4 perform in completing attack ta...,[ avoid potential out-of-\nthe-box attacks by ...,GPT-4 achieves a perfect success rate in compl...,simple,"[{'Published': '2024-03-02', 'Title': 'AutoAtt...",True


In [41]:
security_qna_dataset = security_qna_dataset[security_qna_dataset['ground_truth'] != 'nan']
security_qna_dataset = security_qna_dataset.sample(n=50, random_state=1)

In [42]:
len(security_qna_dataset)

50

In [43]:
security_qna_dataset.to_csv("./benchmark-security2.csv", index=False)