# MOE architecture
----

This notebook will have the basics of a AI MOE system where each expert is specilized in a single protocol and is able to create single packages of that protocol.

<center><img src="https://www.datocms-assets.com/96965/1695407447-image1.png">


#### Imports

In [None]:
import os
os.environ['OPENAI_API_KEY'] = 'FILL'

import openai
from openai import OpenAI
client = OpenAI()

import random
import re
import pickle
import scapy
import toml
import ipaddress
import json

from scapy.all import *
from scapy.utils import RawPcapReader, wrpcap
import scapy.all as scapy
from scapy.layers.l2 import Ether, ARP
from scapy.layers.inet import IP, ICMP
from scapy.layers.http import *

#### Definitions

In [4]:
ip_file = "../../scripts/sample_input/ip_file.toml"
mac_file = "../../scripts/sample_input/mac_file.txt"
resource_file = "../../scripts/sample_input/resource_file.txt"

functioning ="Conversations"

number_of_creations = 5
protocols = ["ICMP", "ARP", "DNS", "HTTP"]
HTTPCodes = [200, 307, 403, 404]
HTTPPhrases = ["OK", "Temporary Redirect", "Forbidden", "Not Found"]


Models_file = 'Models.jsonl'
pkl_saved = "../../data/MOE/pickle/MOE_v3_Conversations_Fine_Tuned_v3.pkl"
pcap_saved = "../../data/MOE/pcap/MOE_v3_Conversations_generated_3_5_turbo-instruct_conv_v3.pcap"

system_message = 'You are a new generation traffic generator. \
You are specilized in the {} protocol and {} generation using python and scapy. \
You are especially attentive to variables and different types of traffic.'

## Step 1: Important functions definition

#### load_toml:
Lee y procesa un paquete toml definido por el usuario. Este paquete Toml es donde están las IPs de diferentes redes, un equipo victima y otro atacante.

In [5]:
def load_toml(ip_file):
    network_ips = []
    victim_ip = ""
    attacker_ip = ""
    with open(ip_file, "r") as f:
        data = toml.load(f)
        for ip_desc in data['network']['ip']:
            # Convierte cada elemento en un objeto de red IP usando la función ipaddress.ip_network
            # Elimina los espacios en blanco al principio y al final del elemento con el método strip
            # Usa el método hosts para obtener una lista de los hosts de la red
            # Añade los hosts a la lista network_ips con el método extend
            network_ips.extend(ipaddress.ip_network(ip_desc.strip()).hosts())
        victim_ip = ipaddress.ip_network(data['victim']['ip'].strip()).hosts()
        attacker_ip = ipaddress.ip_network(
            data['attacker']['ip'].strip()).hosts()

    return network_ips, victim_ip, attacker_ip

In [6]:
def load_txt(file):
    with open(file,"r") as f:
        return f.read().splitlines()

#### generate_packet_summaries:

Esto es un generador automático de resumenes de paquetes basado en la aleatoriedad de la función random.choice()
Solo es capaz de generar resuemenes de tráfico ICMP y DNS, en teoría los elige de los protocolos en la lista protocols pero solo se pueden generar esos 2
Para generar resumenes selecciona aleatoriamente 2 ips de una lista de IPs que se le pasa
Tambien elige aleatoriamente el resto de variables de los paquetes como son la id, sqe, longitud
Por cada paquete generado se aumenta en 2 el ctr
EL generador se para cuando se cumple la condición del ctr, esta condición depende de la variable n que se le pase

In [7]:
def generate_packet_summaries(ip_list, n, protocols, mac_list=None, resource_list=None):
    summaries = []
    ctr = 0
    while ctr < math.ceil(n/5)*5:
        proto = random.choice(protocols)
        if proto == "ICMP":
            ip1, ip2 = random.choices(ip_list, k=2)
            random_id, random_seq = random.randint(
                0, 65535), random.randint(0, 65535)
            random_length = random.choice([76, 100])
            summaries.append(
                "{} → {} ICMP {} (ping) request id={:#06x}, seq={}".format(
                    ip1, ip2, random_length, random_id, random_seq))
            summaries.append(
                "{} → {} ICMP {} (ping) reply id={:#06x}, seq={}".format(
                    ip2, ip1, random_length, random_id, random_seq))
            ctr += 2
        elif proto == "ARP":
            mac1, mac2 = random.choices(mac_list, k=2)
            if mac2 == "FF:FF:FF:FF:FF:FF":
                mac3 = random.choices(mac_list, k=2)
            else:
                if  random.choice([0, 10]) < 5:
                    mac3 = mac2
                else:
                    mac3 =  random.choices(mac_list, k=1)
                    while mac3 == "FF:FF:FF:FF:FF:FF" or mac3 == mac1:
                        mac3 =  random.choices(ordest_list, k=1)
            
            ip1, ip2 = random.choices(ip_list, k=2)
            
            random_length = random.choice([76, 100])
            summaries.append(
                "{}     {}     ARP      {}     Who has {}? Tell {}".format(
                    mac1, mac2, random_length, ip1, ip2))
            summaries.append(
                "{}     {}     ARP      {}     {} is at {}".format(
                    mac3, mac1, random_length, ip1, mac3))
            ctr += 2
    return summaries

#### generate_conversation_summaries:

Esto es un generador automático de resumenes de conversaciones basado en la aleatoriedad de la función random.choice()
Solo es capaz de generar resuemenes de tráfico ICMP, ARP y DNS, en teoría los elige de los protocolos en la lista protocols pero solo se pueden generar esos 3.
Para generar resumenes seleccionan aleatoriamente las ips de una lista de IPs que se le pasa.
Tambien elige aleatoriamente el resto de variables de los paquetes como son la id, sqe, longitud.
Por cada paquete generado se aumenta en 2 el ctr.
EL generador se para cuando se cumple la condición del ctr, esta condición depende de la variable n que se le pase.

In [8]:
def generate_conversation_summaries(ip_list, n, protocols, mac_list=None, resource_list=None):
    summaries = []
    ctr = 0
    while ctr < n:
        proto = random.choice(protocols)
        
        if proto == "ICMP":
            ip1, ip2 = random.choices(ip_list, k=2)
            random_id, random_seq = random.randint(
                0, 65535), random.randint(0, 65535)
            random_type = random.choice(["Echo", "Timestamp"])
            summaries.append(
                "Source: IP={} // Destination: IP={} // Others: id={:#06x} seq= {} type={}".format(
                    ip1, ip2, random_id, random_seq, random_type))
            ctr += 1
        elif proto == "ARP":
            mac1, mac2 = random.choices(mac_list, k=2)
            
            while mac1 == "FF:FF:FF:FF:FF:FF":
                mac1 = random.choices(mac_list, k=1)
                
            ip1, ip2 = random.choices(ip_list, k=2)
            
            summaries.append(
                "Source: MAC={}, IP={} // Destination: MAC={} // Wanted: IP= {}".format(
                    mac1, ip1, mac2, ip2))
            ctr += 1
            
        elif proto == "DNS":
            
            ip1, ip2, ip3 = random.choices(ip_list, k=3)
            random_id = random.randint(0, 65535)
            random_resource = random.choices(resource_list, k=1)
            
            summaries.append(
            "Source: IP= {} // Destination: IP= {} // Others: {:#06x} , resource= {} , response = {}".format(
                    ip1, ip2, random_id, random_resource, ip3))
            
            ctr += 1
            
        elif proto == "HTTP":
            
            ip1, ip2= random.choices(ip_list, k=2)
            port1 = random.randint(1025, 65535)
            window1, window2 = random.randint(200, 6535), random.randint(200, 6535)
            random_resource = random.choices(resource_list, k=1)
            index = random.randint(0, 3)
            code = HTTPCodes[index]
            ReasonPhrase = HTTPPhrases[index]
            
            summaries.append(
            "Source: IP={}, port={}, Window: {} // Destination: IP={}, port=80, Window: {} // Others: Host={}, Path="", Code={}, Reason_Phrase= {}".format(
                    ip1, port1, window1, ip2, window2, random_resource, code, ReasonPhrase))
            
            
            ctr += 1
            
    return summaries

#### generate_ping_flood_summaries
Al contrario que el generador anterior, este sirve para generar un ataque de ping flood a una victima en concreto
También basado en probabilidad, si es True, se envía trafico maligno a la victima desde el atacante, si no, tráfico normal de la red con 2 equipos aleatorios
Si es tráfico normal, se genera tanto la petición como la respuesta

In [9]:
def generate_ping_flood_summaries(ip_list, n, victim_ip, attacker_ip):
    summaries = []
    ctr = 0
    while ctr < math.ceil(n/5)*5:
        malicious = random.choices([True, False], weights=[0.7, 0.3], k=1)
        if malicious[0]: #Si es True
            random_id, random_seq = random.randint(
                0, 65535), random.randint(0, 65535)
            random_length = random.choice([76, 100])
            summaries.append(
                "{} → {} ICMP {} (ping) request id={:#06x}, seq={}".format(
                    attacker_ip, victim_ip, random_length, random_id, random_seq))
            ctr += 1
        else:
            ip1, ip2 = random.choices(ip_list, k=2)
            random_id, random_seq = random.randint(
                0, 65535), random.randint(0, 65535)
            random_length = random.choice([76, 100])
            summaries.append(
                "{} → {} ICMP {} (ping) request id={:#06x}, seq={}".format(
                    ip1, ip2, random_length, random_id, random_seq))
            summaries.append(
                "{} → {} ICMP {} (ping) reply id={:#06x}, seq={}".format(
                    ip2, ip1, random_length, random_id, random_seq))
            ctr += 2
    return summaries

#### generate_packets

Se envian resumenes de paquetes de 5 en 5 al modelo, para ello se usa la variable ctr
Se obtiene la lista de comandos de Scapy y antes de ejecutarla se añade una instrucción para anexar los paquetes creados a una lista grande npackets
luego se ejecutan todas las instrcciones y se devuelven los paquetes

In [10]:
def generate_packets(summaries):
    ctr = 1
    packets = []
    prompt = ""
    for summary in summaries:
        prompt += summary + "\n"
        protocol = obtain_Packet_protocol(summary)
    
        
        if protocol == "ICMP":
            messages=[{"role": "system", "content": system_message.format("ICMP", functioning)},
            {"role": "user", "content": summary}]
            print("\t" + str(ctr)+". Using->  " + Models_list["Packets"]["Specific"]["ICMP"])
            completion = client.chat.completions.create(
            model=Models_list[functioning]["Specific"]["ICMP"], 
            messages=messages,
            max_tokens=2600,
            temperature=0.1)
        
        elif protocol == "ARP":
            messages=[{"role": "system", "content": system_message.format("ARP", functioning)},
            {"role": "user", "content": summary}]
            print("\t" + str(ctr)+". Using->  " + Models_list["Packets"]["Specific"]["ARP"])
            completion = client.chat.completions.create(
            model=Models_list[functioning]["Specific"]["ARP"], 
            messages=messages,
            max_tokens=2600,
            temperature=0.1)
        
        elif protocol == "DNS":
            messages=[{"role": "system", "content": system_message.format("DNS", functioning)},
            {"role": "user", "content": summary}]
            print("\t" + str(ctr)+". Using->  " + Models_list["Packets"]["Specific"]["DNS"])
            completion = client.chat.completions.create(
            model=Models_list[functioning]["Specific"]["DNS"], 
            messages=messages,
            max_tokens=2600,
            temperature=0.1)
        
        elif protocol == "HTTP":
            messages=[{"role": "system", "content": system_message.format("HTTP", functioning)},
            {"role": "user", "content": summary}]
            print("\t" + str(ctr)+". Using->  " + Models_list["Packets"]["Specific"]["HTTP"])
            completion = client.chat.completions.create(
            model=Models_list[functioning]["Specific"]["HTTP"], 
            messages=messages,
            max_tokens=2600,
            temperature=0.1)
        
        
        responses.append(response(messages, completion))
        pickle.dump(responses, open(pkl_saved, "wb" ) )
        
        print("\t\t" + "Comand->  " + str(responses[-1].completion.choices[0].message.content))
        exec("packets.append(" + str(responses[-1].completion.choices[0].message.content + ")"))
        ctr += 1
    return packets

In [11]:
def obtain_Packet_protocol(summary):
    if "ICMP" in summary:
        return "ICMP"
    elif "ARP" in summary:
        return "ARP"
    elif "DNS" in summary:
        return "DNS"
    elif "HTTP" in summary:
        return "HTTP"
    else:
        raise TypeError("Unsupported protocol.")

In [12]:
paquetes = []
def exec_commands(commands):
    print(commands)
    exec(commands)
        
    if protocol != "HTTP":
        pkt_1.show()
        paquetes.append([pkt_1, pkt_2])
        del pkt_1, pkt_2
    else:
        pkt_1.show()
        paquetes.append([pkt_1, pkt_2, pkt_3, pkt_4, pkt_5, pkt_6, pkt_7, pkt_8, pkt_9, pkt_10, pkt_11])
        del pkt_1, pkt_2, pkt_3, pkt_4, pkt_5, pkt_6, pkt_7, pkt_8, pkt_9, pkt_10, pkt_11
    
    return paquetes

In [13]:
def generate_Conversations(summaries):
    
    ctr= 1
    packets = []
    prompt = ""
    for summary in summaries:
        prompt += summary + "\n"
        protocol = obtain_Conversation_protocol(summary)
        
        if protocol == "ICMP":
            messages=[{"role": "system", "content": system_message.format("ICMP", functioning)},
            {"role": "user", "content": summary}]
            print("\t" + str(ctr)+". Using->  " + Models_list["Conversations"]["Specific"]["ICMP"])
            completion = client.chat.completions.create(
            model=Models_list["Conversations"]["Specific"]["ICMP"], 
            messages=messages,
            max_tokens=2600,
            temperature=0.1)
            
        elif protocol == "ARP":
            print("++++++++++++++++++++++++++++++++++++++++")
            messages=[{"role": "system", "content": system_message.format("ARP", functioning)},
            {"role": "user", "content": summary}]
            print("\t" + str(ctr)+". Using->  " + Models_list["Conversations"]["Specific"]["ARP"])
            completion = client.chat.completions.create(
            model=Models_list["Conversations"]["Specific"]["ARP"], 
            messages=messages,
            max_tokens=2600,
            temperature=0.1)

        elif protocol == "DNS":
            messages=[{"role": "system", "content": system_message.format("DNS", functioning)},
            {"role": "user", "content": summary}]
            print("\t" + str(ctr)+". Using->  " + Models_list["Conversations"]["Specific"]["DNS"])
            completion = client.chat.completions.create(
            model=Models_list["Conversations"]["Specific"]["DNS"], 
            messages=messages,
            max_tokens=2600,
            temperature=0.1)
        
        elif protocol == "HTTP":
            messages=[{"role": "system", "content": system_message.format("HTTP", functioning)},
            {"role": "user", "content": summary}]
            print("\t" + str(ctr)+". Using->  " + Models_list["Conversations"]["Specific"]["HTTP"])
            completion = client.chat.completions.create(
            model=Models_list["Conversations"]["Specific"]["HTTP"], 
            messages=messages,
            max_tokens=2600,
            temperature=0.1)
            
        
        responses.append(response(messages, completion))
        pickle.dump(responses, open(pkl_saved, "wb" ) )
        commands = responses[-1].completion.choices[0].message.content[1:]
        
        ldict = {}
        exec(commands,globals(),ldict)
        
        if protocol != "HTTP":
            packets.append([ldict['pkt_1'], ldict['pkt_2']])
            del ldict
        else:
            packets.append([ldict['pkt_1'], ldict['pkt_2'], ldict['pkt_3'], ldict['pkt_4'], ldict['pkt_5'], ldict['pkt_6'], ldict['pkt_7'], ldict['pkt_8'], ldict['pkt_9'], ldict['pkt_10'], ldict['pkt_11']])
            del ldict
        
        print("\t\t" + "Comand->  " + commands)
        
        ctr += 1
    return packets

In [14]:
def obtain_Conversation_protocol(summary):
    if "seq=" in summary:
        return "ICMP"
    elif "MAC=" in summary:
        return "ARP"
    elif "response" in summary:
        return "DNS"
    elif "Window:" in summary:
        return "HTTP"
    else:
        raise TypeError("Unsupported protocol.")

In [15]:
def write_pcap(packets, pcap_saved):
    for packet in packets:
        with open(pcap_saved, "ba+") as f:
            wrpcap(f, packet, append=True)

In [16]:
class response():
    "Stores name and place pairs"
    def __init__(self, name, place):
        self.prompt_summary = name
        self.completion = place

try:
    responses = pickle.load( open(pkl_saved, "rb" ))
except:
    responses = []

print("Number of previous responses: " + str(len(responses)))

Number of previous responses: 0


In [74]:
with open(Models_file, 'r') as json_file:
    Models_list = json.load(json_file)

print("\n---------------------------------------------------------")
print("                 Availiable PACKET models                  ")
print("---------------------------------------------------------\n")
for key, value in Models_list["Packets"]["Specific"].items():
    print( "\t" + key + " -> " + value)

print("\n---------------------------------------------------------")
print("             Availiable CONVERSATIONS models               ")
print("---------------------------------------------------------\n")
for key, value in Models_list["Conversations"]["Specific"].items():
    print( "\t" + key + " -> " + value)

print("\n---------------------------------------------------------")
print("                    Packet generation                    ")
print("---------------------------------------------------------\n")

network_ips, victim_ip, attacker_ip = load_toml(ip_file)
mac_list = load_txt(mac_file)
resource_list = load_txt(resource_file)

if functioning =="Packets":
    summaries = generate_packet_summaries(
            network_ips, n=100, protocols=protocols, mac_list=mac_list, resource_list=resource_list)
elif functioning =="Conversations":
    summaries = generate_conversation_summaries(
            network_ips, n=100, protocols=protocols, mac_list=mac_list, resource_list=resource_list)

print("Generated {} summaries:\n".format(len(summaries)))
for index in range(len(summaries)):
    print("\t" + str(index+1)+". " + summaries[index])

print("\n---------------------------------------------------------")
print("        Using OpenAI API to generate packets...          ")
print("---------------------------------------------------------\n")

packets =[]

if functioning =="Packets":
    packets = generate_packets(summaries)

elif functioning =="Conversations":
    packets = generate_Conversations(summaries)

print(packets)

print("\nGenerated {} packets.\nWriting them to {} ...".format(
        len(packets),
        pcap_saved))

write_pcap(packets, pcap_saved)
print("Done!")



---------------------------------------------------------
                 Availiable PACKET models                  
---------------------------------------------------------

	ICMPv1 -> ft:gpt-3.5-turbo-0125:personal:icmpv1:9ELvoktd
	ICMPv2 -> ft:gpt-3.5-turbo-1106:personal:icmpv2:9F2iiDZa
	DNS -> 
	ARP -> ft:gpt-3.5-turbo-1106:personal:arpv1:9FLIvsUk
	ARPv2 -> ft:gpt-3.5-turbo-1106:personal:arpv2:9FPXrE3X
	HTTP -> 

---------------------------------------------------------
             Availiable CONVERSATIONS models               
---------------------------------------------------------

	ICMP -> ft:gpt-3.5-turbo-1106:personal:convicmpv2:9OTQhQhP
	DNS -> ft:gpt-3.5-turbo-1106:personal:convdnsv2:9OT57bdn
	ARP -> ft:gpt-3.5-turbo-1106:personal:convarpv2:9OUMMFtw
	HTTP -> ft:gpt-3.5-turbo-1106:personal:convhttpv4:9Tq7Cjjl

---------------------------------------------------------
                    Packet generation                    
----------------------------------------------

		Comand->  pkt_1=scapy.IP(src="172.16.161.46", dst="172.16.113.138")/scapy.ICMP(type=13, id=0xb92b, seq=21181)
time.sleep(abs(random.gauss(0, 0.03)))
pkt_2=scapy.IP(src="172.16.113.138", dst="172.16.161.46")/scapy.ICMP(type=14, id=0xb92b, seq=21181)
	2. Using->  ft:gpt-3.5-turbo-1106:personal:convicmpv2:9OTQhQhP
		Comand->  pkt_1=scapy.IP(src="10.0.147.228", dst="172.16.10.168")/scapy.ICMP(type=13, id=0x3b79, seq=48057)
time.sleep(abs(random.gauss(0, 0.03)))
pkt_2=scapy.IP(src="172.16.10.168", dst="10.0.147.228")/scapy.ICMP(type=14, id=0x3b79, seq=48057)
	3. Using->  ft:gpt-3.5-turbo-1106:personal:convdnsv2:9OT57bdn
		Comand->  RANDOM_PORT = random.randint(4097, 65530)
pkt_1 = IP(src="10.0.154.41", dst="172.16.62.246")/UDP(sport=RANDOM_PORT, dport=53)/DNS(id=0x2c36, qr=0, rd=1, opcode=0, qdcount=1, ancount=0, nscount=0, arcount=0, qd=DNSQR(qname="www.microsoft.com", qtype="A", qclass="IN"))
time.sleep(abs(random.gauss(0, 0.03)))
pkt_2 = IP(src="172.16.62.246", dst="10.0.154.41")/UDP(s

		Comand->  pkt_1=scapy.Ether(src="DE:F1:23:45:67:89", dst="67:89:AB:CD:EF:12")/scapy.ARP(op=1, pdst="172.16.14.169", psrc="172.16.12.139", hwdst="67:89:AB:CD:EF:12", hwsrc="DE:F1:23:45:67:89")
time.sleep(abs(random.gauss(0, 0.03)))
pkt_2=scapy.Ether(src="67:89:AB:CD:EF:12", dst="DE:F1:23:45:67:89")/scapy.ARP(op=2, psrc="172.16.14.169", hwsrc="67:89:AB:CD:EF:12", hwdst="DE:F1:23:45:67:89", pdst="172.16.12.139")
	14. Using->  ft:gpt-3.5-turbo-1106:personal:convicmpv2:9OTQhQhP
		Comand->  pkt_1=scapy.IP(src="10.0.8.120", dst="172.16.34.51")/scapy.ICMP(type=8, id=0xdf86, seq=56322)
time.sleep(abs(random.gauss(0, 0.03)))
pkt_2=scapy.IP(src="172.16.34.51", dst="10.0.8.120")/scapy.ICMP(type=0, id=0xdf86, seq=56322)
++++++++++++++++++++++++++++++++++++++++
	15. Using->  ft:gpt-3.5-turbo-1106:personal:convarpv2:9OUMMFtw
		Comand->  pkt_1=scapy.Ether(src="08:15:23:42:56:69", dst="FF:FF:FF:FF:FF:FF")/scapy.ARP(op=1, pdst="10.0.17.94", psrc="10.0.102.22", hwdst="00:00:00:00:00:00", hwsrc="08:15:2

		Comand->  RANDOM_PORT = random.randint(4097, 65530)
pkt_1 = IP(src="10.0.58.30", dst="10.0.159.208")/UDP(sport=RANDOM_PORT, dport=53)/DNS(id=0xfb1d, qr=0, rd=1, opcode=0, qdcount=1, ancount=0, nscount=0, arcount=0, qd=DNSQR(qname="www.twitter.com", qtype="A", qclass="IN"))
time.sleep(abs(random.gauss(0, 0.03)))
pkt_2 = IP(src="10.0.159.208", dst="10.0.153.176")/UDP(sport=53, dport=RANDOM_PORT)/DNS(id=0x0aba, qr=1, opcode=0, ra=1, rcode=0, qdcount=1, ancount=1, nscount=0, arcount=0, qd=DNSQR(qname="www.twitter.com", qtype="A", qclass="IN"), an=DNSRR(rrname="www.twitter.com", type="A", rclass="IN", ttl=255, rdata="10.0.201.243"))
++++++++++++++++++++++++++++++++++++++++
	24. Using->  ft:gpt-3.5-turbo-1106:personal:convarpv2:9OUMMFtw
		Comand->  pkt_1=scapy.Ether(src="BC:DE:F1:23:45:67", dst="AB:CD:EF:12:34:56")/scapy.ARP(op=1, pdst="10.0.26.120", psrc="10.0.88.106", hwdst="AB:CD:EF:12:34:56", hwsrc="BC:DE:F1:23:45:67")
time.sleep(abs(random.gauss(0, 0.03)))
pkt_2=scapy.Ether(src="AB:CD

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'tuple'

In [75]:
write_pcap(packets, pcap_saved)