In [12]:
import pandas as pd

In [13]:
df = pd.read_csv('MaliciousWS_GAI.csv')
df.head()

Unnamed: 0,No.,Time,Source,Destination,Protocol,Length,Info
0,1,0.0,87.248.119.251,10.122.1.223,TCP,66,443 > 35627 [ACK] Seq=1 Ack=1 Win=5 Len=0 SL...
1,2,0.006503,2a04:4e42:400::300,2402:ad80:1f7:8b48:61bd:6b7b:da86:5eb5,TCP,86,443 > 35862 [ACK] Seq=1 Ack=1 Win=278 Len=0 ...
2,3,0.102498,2a04:4e42:400::300,2402:ad80:1f7:8b48:61bd:6b7b:da86:5eb5,TCP,86,443 > 35857 [ACK] Seq=1 Ack=1 Win=297 Len=0 ...
3,4,0.105345,10.122.1.223,18.64.141.12,TCP,55,35895 > 443 [ACK] Seq=1 Ack=1 Win=255 Len=1
4,5,0.142809,2402:ad80:1f7:8b48:61bd:6b7b:da86:5eb5,2404:6800:4003:22::6,TCP,1414,36195 > 443 [ACK] Seq=1 Ack=1 Win=2070 Len=1...


In [14]:
print(f"Total unique protocols: {df['Protocol'].nunique()}\n")
protocol_counts = df['Protocol'].value_counts()
print(protocol_counts)

Total unique protocols: 12

Protocol
TCP        6200
TLSv1.3    3184
QUIC       1399
TLSv1.2     392
DNS         180
ICMP        138
UDP          72
HTTP         18
SSLv2        11
ICMPv6        8
ARP           4
SSL           2
Name: count, dtype: int64


In [10]:


# Clean up column names (remove spaces)
df.columns = df.columns.str.strip()

# 2. Define the Super Guard Function
def super_security_guard(row):
    
    # Get the values for this specific row to make code easier to read
    protocol = row['Protocol']
    length = row['Length']
    info = str(row['Info']) # Convert to string just in case
    source = row['Source']
    dest = row['Destination']

    # --- RULE 1: THE HEAVY PINGER (DoS) ---
    # We look for ICMP protocol AND huge size
    if protocol == 'ICMP':
        if length > 1000:
            return 1 # MALICIOUS (Ping Flood)
        else:
            return 0 # Normal Ping

    # --- RULE 2: THE WEB HACKER (SQL Injection) ---
    # We look for HTTP. Normal browsing is usually GET. Hackers use POST to send bad data.
    # We also look for specific hacker text in the Info column.
    elif protocol == 'HTTP':
        if 'POST' in info: 
            return 1 # MALICIOUS (Sending data to server)
        elif '%20OR%20' in info or '1=1' in info:
            return 1 # MALICIOUS (SQL Injection Code)
        elif 'admin' in info:
            return 1 # MALICIOUS (Trying to login as admin)
        # Also catch the error message response you got!
        elif '404' in info or '500' in info or 'Error' in info:
            return 1 # SUSPICIOUS (Server is crashing/erroring)
    
    # --- RULE 3: THE SCANNER (Nmap) ---
    # Nmap uses TCP but it looks different. 
    # It sends "RST" (Reset) packets when it changes its mind quickly.
    elif protocol == 'TCP':
        # If it's going to or from the ScanMe IP, it's definitely the scan
        if '45.33.32.156' in source or '45.33.32.156' in dest:
            return 1 # MALICIOUS
        
        # If we see many Reset packets, it's suspicious
        if '[RST]' in info or '[RST, ACK]' in info:
            # We flag it if it's not our local network
            if '192.168' not in dest: 
                return 1 # SUSPICIOUS
    
    # --- RULE 4: SPECIFIC IPs (The Catch-All) ---
    # If any protocol (even TLS or DNS) touches the victim IPs, mark it.
    if '45.33.32.156' in source or '45.33.32.156' in dest: # Nmap Target
        return 1
    if '44.228.249.3' in source or '44.228.249.3' in dest: # Vulnweb Target
        return 1

    # --- RULE 5: THE GOOD TRAFFIC (Allow List) ---
    # If it didn't break the rules above, we treat these as safe:
    # TLSv1.3, TLSv1.2, QUIC, SSL = Encrypted traffic (YouTube, Google)
    # ARP, DNS, MDNS = Background network noise
    return 0 # NORMAL

# 3. Apply the Guard
df['Label'] = df.apply(super_security_guard, axis=1)

# 4. Check the results
print("Traffic Report:")
print(df['Label'].value_counts())

# 5. Let's peek at the malicious ones to see if we got them right
print("\nPreview of Malicious Packets:")
print(df[df['Label'] == 1][['Protocol', 'Length', 'Info']].head(10))

Traffic Report:
Label
0    11364
1      244
Name: count, dtype: int64

Preview of Malicious Packets:
    Protocol  Length                                               Info
102     ICMP    1042  Echo (ping) request  id=0x0001, seq=230/58880,...
104     ICMP    1042  Echo (ping) reply    id=0x0001, seq=230/58880,...
355     ICMP    1042  Echo (ping) request  id=0x0001, seq=231/59136,...
356     ICMP    1042  Echo (ping) reply    id=0x0001, seq=231/59136,...
375     ICMP    1042  Echo (ping) request  id=0x0001, seq=232/59392,...
376     ICMP    1042  Echo (ping) reply    id=0x0001, seq=232/59392,...
391     ICMP    1042  Echo (ping) request  id=0x0001, seq=233/59648,...
414     ICMP    1042  Echo (ping) reply    id=0x0001, seq=233/59648,...
435      TCP      54   35871  >  443 [RST, ACK] Seq=2 Ack=1 Win=0 Len=0
437      TCP      74   35933  >  443 [RST, ACK] Seq=2 Ack=1 Win=0 Len=0


In [16]:

df.columns = df.columns.str.strip() # Clean column names

def advanced_protocol_guard(row):
    # Get values for easy reading
    proto = row['Protocol']
    length = row['Length']
    info = str(row['Info'])
    src = row['Source']
    dst = row['Destination']
    
    # --- 1. ICMP RULES (DoS) ---
    if proto == 'ICMP':
        # Your specific attack used large packets
        if length > 1000:
            return 1 # MALICIOUS (Ping Flood)
        return 0 # Normal Ping

    # --- 2. TCP RULES (Nmap Scan) ---
    elif proto == 'TCP':
        # Nmap SYN/RST packets are usually empty (headers only)
        # Header size is usually 20-60 bytes. 
        if length < 70: 
            # If it's small AND it's a Reset or Sync, it's likely a scan
            if 'RST' in info or 'SYN' in info:
                return 1 # MALICIOUS (Port Scan)
        
        # Check against known Bad IPs (The Nmap Target)
        if '45.33.32.156' in src or '45.33.32.156' in dst:
            return 1 # MALICIOUS

    # --- 3. HTTP RULES (SQL Injection) ---
    elif proto == 'HTTP':
        # SQL Injection often puts code in the URL or Body
        if 'POST' in info: # POST is used to send data (like your injection)
            return 1 # SUSPICIOUS
        if '%20OR%20' in info or '1=1' in info: # The specific code you used
            return 1 # MALICIOUS
        if '404' in info or '500' in info or 'Error' in info: # Server crashing
            return 1 # SUSPICIOUS

    # --- 4. UDP RULES (Probing) ---
    elif proto == 'UDP':
        # Nmap uses empty UDP packets to ping hosts. 
        # Tiny UDP packets (header only) are suspicious if not DNS.
        if length < 35:
            return 1 # SUSPICIOUS (UDP Probe)

    # --- 5. ENCRYPTED TRAFFIC (TLS, QUIC, SSL) ---
    elif proto in ['TLSv1.2', 'TLSv1.3', 'QUIC', 'SSL', 'SSLv2']:
        # We can't read the info because it is encrypted.
        # However, we check if it is talking to the bad IPs.
        if '45.33.32.156' in src or '45.33.32.156' in dst:
            return 1
        if '44.228.249.3' in src or '44.228.249.3' in dst:
            return 1
        return 0 # Otherwise, assume it's YouTube/Google (Safe)

    # --- 6. BACKGROUND NOISE (DNS, ARP, MDNS, ICMPv6) ---
    elif proto in ['DNS', 'ARP', 'MDNS', 'ICMPv6']:
        # Generally safe background noise
        # Unless it is DNS looking up the specific hacked site
        if 'vulnweb' in info or 'nmap' in info:
            return 1 # SUSPICIOUS
        return 0

    return 0 # Default to safe

# Apply the function
df['Label'] = df.apply(advanced_protocol_guard, axis=1)

# Check the breakdown
print("Traffic Classification:")
print(df.groupby(['Protocol', 'Label']).size())

Traffic Classification:
Protocol  Label
ARP       0           4
DNS       0         174
          1           6
HTTP      0          14
          1           4
ICMP      1         138
ICMPv6    0           8
QUIC      0        1399
SSL       0           2
SSLv2     0          11
TCP       0        5931
          1         269
TLSv1.2   0         392
TLSv1.3   0        3184
UDP       0          72
dtype: int64
