# Creación de conversaciones DNS

Con este código se va a intentar crear todos los pasos de una conversación DNS, mediante IA generativa. Se pretende pasar una descripción de esta conversación al modelo y que este sea capaz de crear todos los pasos/paquetes necesarios para replicar una conversación con esas mismas especificacioness

In [1]:
import os
import torch
from transformers import (
  AutoConfig,
  AutoTokenizer, 
  AutoModelForCausalLM, 
  BitsAndBytesConfig,
  GenerationConfig,
  pipeline
)

from langchain.embeddings.huggingface import HuggingFaceEmbeddings

from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

In [2]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"]="1,2"

In [3]:
#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [4]:
#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)
        

Your GPU supports bfloat16: accelerate training with bf16=True


In [5]:
model_name = 'mistralai/Codestral-22B-v0.1' #'mistralai/Mixtral-8x7B-Instruct-v0.1' #'mistralai/Codestral-22B-v0.1'

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, legacy=True)
    
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=compute_dtype,
    trust_remote_code=True,
    device_map="auto",
    quantization_config=bnb_config,
)

generation_config = GenerationConfig.from_pretrained(model_name)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.1
generation_config.top_k = 10
generation_config.top_p = 0.1
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

model.generation_config.pad_token_ids = tokenizer.pad_token_id
    
# Crear LLM Chain
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    return_full_text=False,
    generation_config=generation_config,
)

Loading checkpoint shards:   0%|          | 0/9 [00:00<?, ?it/s]

In [7]:
prompt = PromptTemplate(
    input_variables=["system", "user"],
    output_parser=None,
    partial_variables={},
    template = """
    [INST]
    {system}
    
    {user}
    [/INST]
    """
)

system_message = "You are a computer network programmer. Your goal is to generate Python code to create packages with the scapy framework based on a provided conversation summary.\n"
system_message = "For example, given the following summary of the DNS conversation:\n"
system_message += 'Source: IP="192.168.1.10" // Destination: IP="192.168.1.20"// Others: id=0x0045 , resource= "www.github.com" , response = "140.82.112.4"\n'
system_message += "The code generated to create a random port number, and the request and reply packets of the provided conversation, must include the following piece of code:\n"
system_message += "import... \n"
system_message += "... \n"
system_message += "RANDOMPORT = random.randint(4097, 65530)\n"
system_message += "# Create request packet\n"
system_message += 'requestpkt = IP(src="192.168.1.10", dst="192.168.1.20", proto=17)/UDP(sport=RANDOMPORT, dport=53)/DNS(id=0x0045, qr=0, rd=1, opcode=0, qdcount=1, ancount=0, nscount=0, arcount=0, qd=DNSQR(qname="www.github.com", qtype="A", qclass="IN"))\n'
system_message += 'time.sleep(abs(random.gauss(0, 0.03)))'
system_message += "# Create reply packet"
system_message += 'replypkt = IP(src="192.168.1.20", dst="192.168.1.10", proto=17)/UDP(sport=53, dport=RANDOMPORT)/DNS(id=0x0aba, qr=1, opcode=0, ra=1, rcode=0, qdcount=1, ancount=1, nscount=0, arcount=0, qd=DNSQR(qname="www.github.com", qtype="A", qclass="IN"), an=DNSRR(rrname="www.github.com", type="A", rclass="IN", ttl=255, rdata="140.82.112.4"))\n'
system_message += "# Add packets to list\n"
system_message += "pktlist = [requestpkt, replypkt]"   
system_message += "\n\n"

In [8]:
with open("./data/Conversations/DNS/Conv_summaries.txt","r",encoding="utf8") as f:
    Conv_summaries = f.read().splitlines()

print("Número de resúmenes: " +str(len(Conv_summaries)))

Número de resúmenes: 60


In [9]:
class code_response():
    "Stores name and place pairs"
    def __init__(self, name, place):
        self.prompt_summary = name
        self.completion = place

In [10]:
import pickle

try:
    responses = pickle.load(open("./data/Conversations/DNS/pickle/DNS_pairs_Aday.pkl", "rb" ))
except:
    responses = []
    
print("Number of previous responses: " + str(len(responses)))

Number of previous responses: 0


In [11]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
rag_chain = prompt | mistral_llm

  warn_deprecated(


In [12]:
from scapy.all import *
from scapy.utils import RawPcapReader, wrpcap
import scapy.all as scapy

from scapy.layers.inet import IP
from scapy.all import DNS

from tqdm.auto import tqdm

progress_bar = tqdm(range(len(Conv_summaries)))

counter = 0

for i in range(len(Conv_summaries)):

    text_sum = ""
    
    summary = Conv_summaries[i]
    
    #print("\n...........................................")
    #print("GENERATING SCAPY COMMANDS FOR THE FOLLOWING CONVERSATIONS (iter " +str(i+1) +")\n")
    
    query_content = "Given the following summary of a conversation:\n"
    query_content += summary + "\n"
    text_sum += summary + "\n"
    query_content += "# Instructions:\n"
    query_content += "## Generate a complete python code for creating packets with scapy framework. Use the provided example structure to simulate real-world conditions, adding the sleep function call between packets.\n"
    query_content += "## Take your time (a few seconds) to validate if the last code line of your code is 'pktlist = [requestpkt, replypkt]', and if the addresses used in the packets correspond to the same ones as in the DNS conversation provided; If not, make the appropriate corrections to the code.\n"
    query_content += "## Don't explain the code, just generate the code block itself. PLEASE DONT start the responses with ```python. This is a flagrant error and will make the code unexecutable. You have done this last step wrong plenty of times, take your time to generate the output without ```python please.\n"

    # Query es el mensaje que le envias al modelo
    #print(query_content)

    completion = ""
    completion = rag_chain.invoke({"system": system_message, "user": query_content})

    progress_bar.update(1)

    #print("\n...Generated!")

    # Filtramos ```python
    #pos1 = completion.find("```python\n")
    #pos2 = completion.find("\n```")
    #print(pos1, pos2)
    #if pos1 != -1 and pos2 != 1:
    #    completion = completion[pos1+len("```python\n"):pos2]

    # Parece que hay problemas de indentado en el código resultante ¿?
    completion_copy = completion
    completion = ""
    for line in completion_copy.splitlines():
        completion += line.strip()+"\n"
        
    # Guardamos tanto la petición como la respuesta en un archivo
    #pickle.dump(responses, open( "./data/Conversations/DNS/pickle/DNS_pairs_Aday.pkl", "wb" ) )
    try:
        #print("Ejecutando python...")
        exec(completion)
        #print(pktlist)
        counter += 1
    except:
        print("Error en la ejecución\n")
        #print(completion)
        #print(":".join("{:02x}".format(ord(c)) for c in completion))
        continue

    # Guardamos tanto el resumen de los paquetes que solicitamos y lo que devuelve el modelo para estos paquetes
    try:
        responses.append(code_response(text_sum, completion))
    except:
        print("Error almacenando resultado del modelo\n")
    
    # Guardamos los paquetes generados en un pcap
    with open("./data/Conversations/DNS/pcap/DNS_generated_codestral_nocuantizado.pcap", "ba+") as f:
        wrpcap(f, pktlist, append=True)

print("Number of completitions done: " + str(counter))


  0%|          | 0/60 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error en la ejecución

Error en la ejecución

Number of completitions done: 21


In [13]:
# Este es el resumen de lo que está en el struct de responses para la última petición
print(responses[-1].prompt_summary)
print(responses[-1].completion)

Source: IP= "198.51.100.1" // Destination: IP= "198.51.100.2" // Others: id=0x1455 , resource= www.hbo.com , response = "34.192.0.174"


RANDOMPORT = random.randint(4097, 65530)
# Create request packet
requestpkt = IP(src="198.51.100.1", dst="198.51.100.2", proto=17)/UDP(sport=RANDOMPORT, dport=53)/DNS(id=0x1455, qr=0, rd=1, opcode=0, qdcount=1, ancount=0, nscount=0, arcount=0, qd=DNSQR(qname="www.hbo.com", qtype="A", qclass="IN"))
time.sleep(abs(random.gauss(0, 0.03)))
# Create reply packet
replypkt = IP(src="198.51.100.2", dst="198.51.100.1", proto=17)/UDP(sport=53, dport=RANDOMPORT)/DNS(id=0x1455, qr=1, opcode=0, ra=1, rcode=0, qdcount=1, ancount=1, nscount=0, arcount=0, qd=DNSQR(qname="www.hbo.com", qtype="A", qclass="IN"), an=DNSRR(rrname="www.hbo.com", type="A", rclass="IN", ttl=255, rdata="34.192.0.174"))
# Add packets to list
pktlist = [requestpkt, replypkt]

