In [1]:
from owlready2 import get_ontology, default_world
import pandas as pd

csv_path = 'KDDTrain_20%.csv'
onto_path = 'ids_ontology.rdf'

In [2]:
dataset = pd.read_csv(csv_path)

# Identify the columns to be removed
columns_to_remove = dataset.loc[:, 'serror_rate':'dst_host_srv_rerror_rate'].columns

# Drop the specified columns
dataset = dataset.drop(columns=columns_to_remove)
dataset = dataset.drop(columns=['level'])

dataset.head()

Unnamed: 0,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,...,num_root,num_file_creations,num_shells,num_access_files,num_outbound_cmds,is_host_login,is_guest_login,count,srv_count,attack
0,0,tcp,ftp_data,SF,491,0,0,0,0,0,...,0,0,0,0,0,0,0,2,2,normal
1,0,udp,other,SF,146,0,0,0,0,0,...,0,0,0,0,0,0,0,13,1,normal
2,0,tcp,private,S0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,123,6,neptune
3,0,tcp,http,SF,232,8153,0,0,0,0,...,0,0,0,0,0,0,0,5,5,normal
4,0,tcp,http,SF,199,420,0,0,0,0,...,0,0,0,0,0,0,0,30,32,normal


In [3]:
# Identify distinct attack types in the CSV file
distinct_attacks = dataset['attack'].unique()
distinct_attacks.tolist()

dos = ["apache2","back","land","neptune","pod","processtable","smurf","teardrop","udpstorm","worm"]
probe = ["ipsweep", "mscan", "nmap", "portsweep", "saint", "satan"]
r2l = ["ftp_write", "guess_password", "http_tunnel", "imap", "multihop",
    "named", "phf", "sendmail", "snmpgetattack", "snmpguess",
    "spy", "warezclient", "warezmaster", "xlock", "xsnoop"]
u2r = ["buffer_overflow", "loadmodule", "perl", "ps",
    "rootkit", "sqlattack", "xterm"]
protocol = ["icmp","tcp","udp"]
service = ["auth", "bgp", "courier", "csnet_ns", "ctf", "daytime", "discard", "domain", "domain_u", "echo", "eco_i", "ecr_i", "efs", "exec", "finger", "ftp", "ftp_data", "gopher", "hostnames", "http", "http_443", "http_8001", "imap4", "IRC", "iso_tsap", "klogin", "kshell", "ldap", "link", "login", "mtp", "name", "netbios_dgm", "netbios_ns", "netbios_ssn", "netstat", "nnsp", "nntp", "ntp_u", "other", "pm_dump", "pop_2", "pop_3", "printer", "private", "red_i", "remote_job", "rje", "shell", "smtp", "sql_net", "ssh", "sunrpc", "supdup", "systat", "telnet", "tim_i", "time", "urh_i", "urp_i", "uucp", "uucp_path", "vmnet", "whois", "X11", "Z39_50"]

In [4]:
# Load your ontology
ontology = get_ontology(onto_path).load()

In [5]:
def creat_individuals(ontology, list_name, class_prefix, name_prefix):
    """
    Create individuals for each attack in the list. Each individual will be of type Dos_<Attack>
    and have a name attack_<Attack>.

    :param ontology: The loaded ontology.
    :param list_name: List of individuals names.
    :param class_prefix: prefix of the class
    :param name_prefix: prefix of the name
    """
    for item in list_name:
        # Format class and individual names
        class_name = f"{class_prefix}_{item}"
        individual_name = f"{name_prefix}_{item}"

        # Check if the class already exists, if not create it
        attack_class = ontology.search_one(iri=f"*{class_name}")
        if not attack_class:
            attack_class = types.new_class(class_name, (ontology.Dos,))

        # Create an individual of this class
        new_individual = attack_class(individual_name)

In [6]:
creat_individuals(ontology, dos, "Dos", "attack")
creat_individuals(ontology, probe, "Probe", "attack")
creat_individuals(ontology, r2l, "R2l", "attack")
creat_individuals(ontology, u2r, "U2r", "attack")
creat_individuals(ontology, protocol, "Protocol", "protocol")
creat_individuals(ontology, service, "Service", "service")

In [7]:
# Save the updated ontology
ontology.save(file='ids_onto2.rdf', format="rdfxml")

In [9]:
# Select the first row for demonstration
row = dataset.iloc[0]

# Create a new individual of NetworkTraffic
new_individual = ontology.NetworkTraffic()

# Assign data properties
for column in dataset.columns:
    if column not in ['attack', 'protocol_type', 'service']:
        data_property_name = f"NT_{column}"
        # Convert numpy data types to native Python types
        value = row[column].item() if hasattr(row[column], 'item') else row[column]
        getattr(new_individual, data_property_name).append(value)

# Assign object properties
if row['attack'] != 'normal':
    for individual in ontology.individuals():
        if row['attack'] in individual.name:
            new_individual.hasAttack.append(individual)
            break

for individual in ontology.individuals():
    if row['protocol_type'] in individual.name:
        new_individual.hasProtocolType.append(individual)
        break

for individual in ontology.individuals():
    if row['service'] in individual.name:
        new_individual.hasServiceType.append(individual)
        break

# Save the updated ontology
ontology.save(file="updated_ontology.owl", format="rdfxml")

In [12]:
# Iterate over each row in the dataset
for index, row in dataset.head(5).iterrows():
    # Create a new individual of NetworkTraffic with a unique name
    new_individual = ontology.NetworkTraffic("nt_" + str(index))

    # Assign data properties
    for column in dataset.columns:
        if column not in ['attack', 'protocol_type', 'service']:
            data_property_name = f"NT_{column}"
            property_instance = getattr(ontology, data_property_name)
            # Convert numpy data types to native Python types
            value = row[column].item() if hasattr(row[column], 'item') else row[column]
            getattr(new_individual, data_property_name).append(value)

    # Assign object properties
    if row['attack'] != 'normal':
        for individual in ontology.individuals():
            if row['attack'] in individual.name:
                new_individual.hasAttack.append(individual)
                break

    for individual in ontology.individuals():
        if row['protocol_type'] in individual.name:
            new_individual.hasProtocolType.append(individual)
            break

    for individual in ontology.individuals():
        if row['service'] in individual.name:
            new_individual.hasServiceType.append(individual)
            break

# Save the updated ontology
ontology.save(file="updated_ontology.rdf", format="rdfxml")

In [15]:
# Define the SPARQL query
query = """
SELECT ?serviceType (AVG(?duration) as ?avgDuration) WHERE {
    ?traffic a :NetworkTraffic .
    ?traffic :NT_duration ?duration .
    ?traffic :hasServiceType ?service .
    ?service rdfs:label ?serviceType
}
GROUP BY ?serviceType
"""

# Execute the query
results = list(ontology.sparql(query))

# Print the results
for item in results:
    print(item)

TypeError: 'NoneType' object is not callable