In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('MaliciousWS_1GAI.csv')
df.head()

Unnamed: 0,No.,Time,Source,Destination,Protocol,Length,Info
0,1,0.0,87.248.119.251,10.122.1.223,TCP,66,443 > 35627 [ACK] Seq=1 Ack=1 Win=5 Len=0 SL...
1,2,0.006503,2a04:4e42:400::300,2402:ad80:1f7:8b48:61bd:6b7b:da86:5eb5,TCP,86,443 > 35862 [ACK] Seq=1 Ack=1 Win=278 Len=0 ...
2,3,0.102498,2a04:4e42:400::300,2402:ad80:1f7:8b48:61bd:6b7b:da86:5eb5,TCP,86,443 > 35857 [ACK] Seq=1 Ack=1 Win=297 Len=0 ...
3,4,0.105345,10.122.1.223,18.64.141.12,TCP,55,35895 > 443 [ACK] Seq=1 Ack=1 Win=255 Len=1
4,5,0.142809,2402:ad80:1f7:8b48:61bd:6b7b:da86:5eb5,2404:6800:4003:22::6,TCP,1414,36195 > 443 [ACK] Seq=1 Ack=1 Win=2070 Len=1...


In [3]:
print(f"Total unique protocolcols: {df['Protocol'].nunique()}\n")
protocol_counts = df['Protocol'].value_counts()
print(protocol_counts)

Total unique protocolcols: 12

Protocol
TCP        6200
TLSv1.3    3184
QUIC       1399
TLSv1.2     392
DNS         180
ICMP        138
UDP          72
HTTP         18
SSLv2        11
ICMPv6        8
ARP           4
SSL           2
Name: count, dtype: int64


In [6]:
df.columns = df.columns.str.strip()

def super_security_guard(row):
  
    source = row['Source']
    destination = row['Destination']
    protocol = row['Protocol']
    length = row['Length']
    info = row['Info']
    
    # ICMP ping flood (DOS)
    if protocol == 'ICMP':
        if length > 1000:
            return 1
        else:
            return 0
        
    # THE WEB HACKER (SQL Injection)
    # Normal browsing is usually GET. Suspicious use POST ones to send bad data. error codes for suspicious server crashing/erroring.
    elif protocol == 'HTTP':
        if 'POST' in info: 
            return 1
        elif '%20OR%20' in info or '1=1' in info:
            return 1 # MALICIOUS (SQL Injection Code)
        elif 'admin' in info:
            return 1 # MALICIOUS (Trying to login as admin)
        # Also catch the error message response you got!
        elif '404' in info or '500' in info or 'Error' in info:
            return 1 
    
    # Nmap scanner
    # Nmap uses TCP but it looks different. 
    # It sends "RST" (Reset) packets when it changes its mind quickly.
    elif protocol == 'TCP':
					if length < 70:
      # If it's small AND it's a Reset or Sync, it's likely a scan
						if 'RST' in info or 'SYN' in info:
							return 1
    
						# If it's going to or from the ScanMe IP, it's definitely the scan
						if '45.33.32.156' in source or '45.33.32.156' in destination:
							return 1 # MALICIOUS
        
						# If we see many Reset packets, it's suspicious
						if '[RST]' in info or '[RST, ACK]' in info:
							# We flag it if it's not our local network
							if '192.168' not in destination:
								return 1 # SUSPICIOUS
    
				# --- 4. UDP RULES (Probing) ---
    elif protocol == 'UDP':
     # Nmap uses empty UDP packets to ping hosts. 
     # Tiny UDP packets (header only) are suspicious if not DNS.
     if length < 35:
      return 1 # SUSPICIOUS (UDP Probe)

    # --- 5. ENCRYPTED TRAFFIC (TLS, QUIC, SSL) ---
    elif protocol in ['TLSv1.2', 'TLSv1.3', 'QUIC', 'SSL', 'SSLv2']:
        # We can't read the info because it is encrypted.
        # However, we check if it is talking to the bad IPs.
        if '45.33.32.156' in source or '45.33.32.156' in destination:
            return 1
        if '44.228.249.3' in source or '44.228.249.3' in destination:
            return 1
        return 0 # Otherwise, assume it's YouTube/Google (Safe)

    # --- 6. BACKGROUND NOISE (DNS, ARP, MDNS, ICMPv6) ---
    elif protocol in ['DNS', 'ARP', 'MDNS', 'ICMPv6']:
        # Generally safe background noise
        # Unless it is DNS looking up the specific hacked site
        if 'vulnweb' in info or 'nmap' in info:
            return 1 # SUSPICIOUS
        return 0
				
    # --- RULE 4: SPECIFIC IPs (The Catch-All) ---
    # If any protocol (even TLS or DNS) touches the victim IPs, mark it.
    if '45.33.32.156' in source or '45.33.32.156' in destination: # Nmap Target
        return 1
    if '44.228.249.3' in source or '44.228.249.3' in destination: # Vulnweb Target
        return 1

    # --- RULE 5: THE GOOD TRAFFIC (Allow List) ---
    # If it didn't break the rules above, we treat these as safe:
    # TLSv1.3, TLSv1.2, QUIC, SSL = Encrypted traffic (YouTube, Google)
    # ARP, DNS, MDNS = Background network noise
    return 0 # NORMAL

# 3. Apply the Guard
df['Label'] = df.apply(super_security_guard, axis=1)

# 4. Check the results
print("Traffic Report:")
print(df['Label'].value_counts())

# 5. Let's peek at the malicious ones to see if we got them right
print("\nPreview of Malicious Packets:")
print(df[df['Label'] == 1][['Protocol', 'Length', 'Info']].head(10))

Traffic Report:
Label
0    11139
1      469
Name: count, dtype: int64

Preview of Malicious Packets:
    Protocol  Length                                               Info
102     ICMP    1042  Echo (ping) request  id=0x0001, seq=230/58880,...
104     ICMP    1042  Echo (ping) reply    id=0x0001, seq=230/58880,...
355     ICMP    1042  Echo (ping) request  id=0x0001, seq=231/59136,...
356     ICMP    1042  Echo (ping) reply    id=0x0001, seq=231/59136,...
375     ICMP    1042  Echo (ping) request  id=0x0001, seq=232/59392,...
376     ICMP    1042  Echo (ping) reply    id=0x0001, seq=232/59392,...
391     ICMP    1042  Echo (ping) request  id=0x0001, seq=233/59648,...
393      TCP      66  36348  >  53 [SYN] Seq=0 Win=64240 Len=0 MSS=1...
394      TCP      66  36349  >  53 [SYN] Seq=0 Win=64240 Len=0 MSS=1...
395      TCP      66  36350  >  53 [SYN] Seq=0 Win=64240 Len=0 MSS=1...
