**Disclaimer:** in the analysed network flow, the victim IP address was anonymized (to 0.0.0.0) and it shows only the traffic going to the victim. The source IP addresses represent the actual misused machines by the attacker group.

# Libraries required

In [1]:
import pandas as pd
import numpy as np

# Reading the raw data

In [2]:
df = pd.read_csv('flows_ddos_example.zip',delim_whitespace=True, error_bad_lines=False).reset_index(drop=True)

In [3]:
df.head()

Unnamed: 0,Date,first,seen,Duration,Proto,Src,IP,Addr:Port,Dst,IP.1,Addr:Port.1,Flags,Tos,Packets,Bytes,Flows
0,2020-08-27,14:34:21.544,0.0,TCP,83.96.157.130:16705,->,0.0.0.0:25,....S.,0,1000,48000.0,1,,,,
1,2020-08-27,14:34:53.728,0.0,TCP,151.46.89.230:2249,->,0.0.0.0:443,....S.,0,1000,64000.0,1,,,,
2,2020-08-27,14:36:20.861,0.0,TCP,216.244.66.196:46140,->,0.0.0.0:80,.A....,0,1000,52000.0,1,,,,
3,2020-08-27,14:36:32.317,100.11,UDP,112.1.202.9:3702,->,0.0.0.0:6079,......,4,2000,1.6,M,1.0,,,
4,2020-08-27,14:36:31.914,119.519,UDP,195.20.152.24:0,->,0.0.0.0:0,......,0,21000,20.8,M,1.0,,,


YES, the data is not well formatted. For this reason you need to pre-process the data! 

# Pre-processing the data

In [4]:
df.columns=['date','time','duration','protocol','src_ip_port','>>','dst_ip_port','tcp_flags','tos','packets','bytes','flows','a','b','c','d']
df['time'] = pd.to_datetime(df['time'], format='%H:%M:%S.%f').dt.time
df['src_ip'] = df['src_ip_port'].apply(lambda x: str(x).split(':')[0])
df['src_port'] = df['src_ip_port'].apply(lambda x: str(x).split(':')[1] if ':' in str(x) else 'na')
df['dst_ip'] = df['dst_ip_port'].apply(lambda x: str(x).split(':')[0])
df['dst_port'] = df['dst_ip_port'].apply(lambda x: str(x).split(':')[1] if ':' in str(x) else 'na')
df['bytes'] = np.where(df['flows'].astype(str)=='M',df['bytes']*1024*1024,df['bytes'])
df['flows'] = np.where(df['flows'].astype(str)=='M',df['a'],df['flows'])

In [5]:
df.head()

Unnamed: 0,date,time,duration,protocol,src_ip_port,>>,dst_ip_port,tcp_flags,tos,packets,bytes,flows,a,b,c,d,src_ip,src_port,dst_ip,dst_port
0,2020-08-27,14:34:21.544000,0.0,TCP,83.96.157.130:16705,->,0.0.0.0:25,....S.,0,1000,48000.0,1,,,,,83.96.157.130,16705,0.0.0.0,25
1,2020-08-27,14:34:53.728000,0.0,TCP,151.46.89.230:2249,->,0.0.0.0:443,....S.,0,1000,64000.0,1,,,,,151.46.89.230,2249,0.0.0.0,443
2,2020-08-27,14:36:20.861000,0.0,TCP,216.244.66.196:46140,->,0.0.0.0:80,.A....,0,1000,52000.0,1,,,,,216.244.66.196,46140,0.0.0.0,80
3,2020-08-27,14:36:32.317000,100.11,UDP,112.1.202.9:3702,->,0.0.0.0:6079,......,4,2000,1677721.6,1,1.0,,,,112.1.202.9,3702,0.0.0.0,6079
4,2020-08-27,14:36:31.914000,119.519,UDP,195.20.152.24:0,->,0.0.0.0:0,......,0,21000,21810380.8,1,1.0,,,,195.20.152.24,0,0.0.0.0,0


YES, there are some rubish columns!

# Delete rubish columns

In [6]:
df = df.drop(['src_ip_port','>>','dst_ip_port','a','b','c','d'], axis=1)

In [7]:
df.head()

Unnamed: 0,date,time,duration,protocol,tcp_flags,tos,packets,bytes,flows,src_ip,src_port,dst_ip,dst_port
0,2020-08-27,14:34:21.544000,0.0,TCP,....S.,0,1000,48000.0,1,83.96.157.130,16705,0.0.0.0,25
1,2020-08-27,14:34:53.728000,0.0,TCP,....S.,0,1000,64000.0,1,151.46.89.230,2249,0.0.0.0,443
2,2020-08-27,14:36:20.861000,0.0,TCP,.A....,0,1000,52000.0,1,216.244.66.196,46140,0.0.0.0,80
3,2020-08-27,14:36:32.317000,100.11,UDP,......,4,2000,1677721.6,1,112.1.202.9,3702,0.0.0.0,6079
4,2020-08-27,14:36:31.914000,119.519,UDP,......,0,21000,21810380.8,1,195.20.152.24,0,0.0.0.0,0


# Duration (first and last record)

In [8]:
print(df['time'].iloc[0])
print(df['time'].iloc[-2])

14:34:21.544000
14:50:51.844000


# ===========================
# DEFINING THE VICTIM

based on the number of flows:

In [9]:
df['dst_ip'].value_counts()

0.0.0.0    511977
Name: dst_ip, dtype: int64

based on the number of bytes:

In [10]:
df.groupby(['dst_ip'])['bytes'].sum().sort_values(ascending=False)

dst_ip
0.0.0.0    2.497940e+12
Name: bytes, dtype: float64

# ===========================

# Analysing the overall stats against the victim

In [11]:
victim = '0.0.0.0'

In [12]:
df_victim = df[df['dst_ip']==victim]

In [13]:
df_victim['protocol'].value_counts()

UDP     424043
TCP      60359
GRE       5733
ICMP      5581
EGP       5503
AH        5499
PIM       5259
Name: protocol, dtype: int64

In [14]:
df_victim['src_port'].value_counts()

0        208588
53       140269
3283      62042
3702      34327
123        6223
          ...  
54198         1
50626         1
18183         1
59899         1
54395         1
Name: src_port, Length: 39468, dtype: int64

In [15]:
df_victim['dst_port'].value_counts()

80       215302
0        203006
2079       8883
40678      8834
46961      8743
          ...  
47            1
5.1           1
317           1
480           1
432           1
Name: dst_port, Length: 71, dtype: int64

# ===========================

# VECTOR 1: source port 0 (and destination port 0)

In [16]:
df_vector1 = df_victim[df_victim['src_port']=='0']

In [17]:
df_vector1['dst_port'].value_counts()

0       203006
3.3       4792
11.0       334
3.1        314
3.0         85
3.13        35
3.2         17
3.10         3
80           1
5.1          1
Name: dst_port, dtype: int64

In [18]:
df_vector1 = df_vector1[df_vector1['dst_port']=='0']

In [19]:
df_vector1.groupby(['src_ip']).agg({'packets':'sum', 'bytes':'sum'}).sort_values(by='bytes', ascending = False)

Unnamed: 0_level_0,packets,bytes
src_ip,Unnamed: 1_level_1,Unnamed: 2_level_1
115.78.11.156,1123000,625370726.4
193.218.137.36,498000,545049804.8
14.241.247.251,873000,477311795.2
171.246.67.36,561000,476472934.4
113.170.216.69,589000,471754342.4
...,...,...
199.219.158.167,1000,40000.0
67.64.38.232,1000,40000.0
199.242.145.241,1000,40000.0
56.99.144.142,1000,40000.0


# ===========================
# REMAINING
Now we must remove the previous attack vector found and keep analysing the data to find other attack vectors!

In [20]:
df_remaining = df_victim[(df_victim['src_port']!='0') & (df_victim['dst_port']!='0')]
print("Source port frequency of flows:")
display (df_remaining['src_port'].value_counts())
print()
print("Destination port frequency of flows:")
display (df_remaining['dst_port'].value_counts())

Source port frequency of flows:


53       140269
3283      62042
3702      34327
123        6223
40256        21
          ...  
16278         1
32276         1
51011         1
42910         1
31684         1
Name: src_port, Length: 39467, dtype: int64


Destination port frequency of flows:


80       215301
2079       8883
40678      8834
46961      8743
27141      8716
          ...  
8             2
480           1
432           1
47            1
317           1
Name: dst_port, Length: 62, dtype: int64

# Vector 2: src port 53 ( and dst port 80)

In [21]:
df_vector2 = df_remaining[df_remaining['src_port']=='53']

In [22]:
df_vector2['dst_port'].value_counts()

80     140159
7           7
268         6
44          5
405         4
245         4
410         4
299         4
267         4
417         4
269         4
10          4
265         4
406         4
503         4
298         3
411         3
266         3
318         3
259         3
408         3
9           3
264         3
409         3
416         3
261         3
293         2
407         2
8           2
418         2
300         2
297         2
480         1
317         1
47          1
Name: dst_port, dtype: int64

In [23]:
df_vector2 = df_remaining[(df_remaining['src_port']=='53') &(df_remaining['dst_port']=='80')]

In [24]:
df_vector2.groupby(['src_ip']).agg({'packets':'sum', 'bytes':'sum'}).sort_values(by='bytes', ascending = False)

Unnamed: 0_level_0,packets,bytes
src_ip,Unnamed: 1_level_1,Unnamed: 2_level_1
200.4.197.74,59000,92798976.0
160.238.209.33,58000,91226112.0
79.106.161.34,58000,91226112.0
64.191.106.114,57000,89653248.0
187.9.78.166,57000,89653248.0
...,...,...
181.39.46.125,1000,48000.0
185.233.13.243,1000,48000.0
165.165.75.57,1000,40000.0
45.230.84.13,1000,40000.0


## Intersection Vector 1 and Vector 2

In [25]:
ips_vector1 = df_vector1['src_ip'].unique()
ips_vector2 = df_vector2['src_ip'].unique()
print('unique ips in vector 1 (src_port 0 to dst_port 0):',len(ips_vector1))
print('unique ips in vector 2 (src_port 53 to dst_port 80):',len(ips_vector2))
print('unique ips in the intersection of IPs in vector 1 and vector2',len(list(set(ips_vector1) & set(ips_vector2))))

unique ips in vector 1 (src_port 0 to dst_port 0): 72984
unique ips in vector 2 (src_port 53 to dst_port 80): 54215
unique ips in the intersection of IPs in vector 1 and vector2 44607


# ===========================
# REMAINING
Now we must remove the previous attack vector found and keep analysing the data to find other attack vectors!

In [26]:
df_remaining = df_remaining[(df_remaining['src_port']!='53') & (df_victim['dst_port']!='80')]
print("Source port frequency of flows:")
display (df_remaining['src_port'].value_counts())
print()
print("Destination port frequency of flows:")
display (df_remaining['dst_port'].value_counts())

Source port frequency of flows:


  df_remaining = df_remaining[(df_remaining['src_port']!='53') & (df_victim['dst_port']!='80')]


3283     62042
3702     19637
123       6222
40256       20
11376       14
50131       13
11931        4
16948        4
1185         4
4702         4
443          2
3329         1
19291        1
45100        1
49984        1
39032        1
16705        1
53226        1
37172        1
20276        1
2249         1
48760        1
38664        1
Name: src_port, dtype: int64


Destination port frequency of flows:


2079     8883
40678    8834
46961    8743
27141    8716
6079     6844
11219    6417
1649     6379
13309    3415
19131    3056
46191    2900
50368    2422
46062    2371
17058    2320
52914    2106
7024     2102
12098    2059
51396    2057
63404    1907
31077    1679
33150    1677
45959    1656
22475    1417
44          5
443         4
431         3
25          3
430         2
432         1
Name: dst_port, dtype: int64

# ===========================

# Vector 3: source port 3283 (to "MANY")
https://www.netscout.com/blog/asert/call-arms-apple-remote-management-service-udp

In [27]:
df_vector3 = df_remaining[df_remaining['src_port']=='3283']

In [28]:
df_vector3['dst_port'].value_counts()

2079     8874
40678    8829
46961    8733
27141    8708
13309    3414
19131    3053
46191    2896
50368    2421
46062    2370
17058    2316
7024     2100
63404    1905
31077    1677
33150    1676
45959    1655
22475    1415
Name: dst_port, dtype: int64

In [29]:
df_vector3.groupby(['src_ip']).agg({'packets':'sum', 'bytes':'sum'}).sort_values(by='bytes', ascending = False)

Unnamed: 0_level_0,packets,bytes
src_ip,Unnamed: 1_level_1,Unnamed: 2_level_1
90.85.218.23,59000,63753420.8
141.219.152.93,58000,62809702.4
90.77.181.103,58000,62809702.4
185.209.212.59,57000,61656268.8
173.164.149.1,56000,60712550.4
...,...,...
208.87.18.25,1000,390000.0
128.223.127.193,1000,390000.0
128.255.218.158,1000,390000.0
79.11.165.179,1000,390000.0


In [30]:
# ips_vector1 = df_vector1['src_ip'].unique()
# ips_vector2 = df_vector2['src_ip'].unique()
ips_vector3 = df_vector3['src_ip'].unique()
print('unique ips in vector 1 (src_port 0 to dst_port 0):',len(ips_vector1))
print('unique ips in vector 2 (src_port 53 to dst_port 80):',len(ips_vector2))
print('unique ips in vector 2 (src_port 3283 to dst_port ANY):',len(ips_vector3))
print('unique ips in the intersection of IPs in vector 1 and vector2',len(list(set(ips_vector1) & set(ips_vector2))))
print('unique ips in the intersection of IPs in vector 1 and vector3',len(list(set(ips_vector1) & set(ips_vector3))))
print('unique ips in the intersection of IPs in vector 2 and vector3',len(list(set(ips_vector2) & set(ips_vector3))))

unique ips in vector 1 (src_port 0 to dst_port 0): 72984
unique ips in vector 2 (src_port 53 to dst_port 80): 54215
unique ips in vector 2 (src_port 3283 to dst_port ANY): 7631
unique ips in the intersection of IPs in vector 1 and vector2 44607
unique ips in the intersection of IPs in vector 1 and vector3 1
unique ips in the intersection of IPs in vector 2 and vector3 3


# ===========================
# REMAINING
Now we must remove the previous attack vector found and keep analysing the data to find other attack vectors!

In [31]:
df_remaining = df_remaining[(df_remaining['src_port']!='3283')]
print("Source port frequency of flows:")
display (df_remaining['src_port'].value_counts())
print()
print("Destination port frequency of flows:")
display (df_remaining['dst_port'].value_counts())

Source port frequency of flows:


3702     19637
123       6222
40256       20
11376       14
50131       13
16948        4
1185         4
11931        4
4702         4
443          2
19291        1
38664        1
37172        1
45100        1
2249         1
3329         1
48760        1
39032        1
16705        1
20276        1
49984        1
53226        1
Name: src_port, dtype: int64


Destination port frequency of flows:


6079     6844
11219    6417
1649     6379
52914    2106
12098    2059
51396    2057
46961      10
2079        9
27141       8
44          5
40678       5
17058       4
46191       4
443         4
25          3
431         3
19131       3
22475       2
31077       2
63404       2
430         2
7024        2
432         1
46062       1
13309       1
45959       1
50368       1
33150       1
Name: dst_port, dtype: int64

# ===========================

# Vector 4: src_port 3702 (and dst_ports 6079, 11219, and 1649)
https://blogs.akamai.com/sitr/2019/09/new-ddos-vector-observed-in-the-wild-wsd-attacks-hitting-35gbps.html

In [32]:
df_vector4 = df_remaining[df_remaining['src_port']=='3702']

In [33]:
df_vector4['dst_port'].value_counts()

6079     6842
11219    6410
1649     6376
44          5
431         3
432         1
Name: dst_port, dtype: int64

In [34]:
df_vector4 = df_remaining[(df_remaining['src_port']=='3702') & (df_remaining['dst_port'].isin(['6079', '11219','1649']))]

In [35]:
df_vector4.groupby(['src_ip']).agg({'packets':'sum', 'bytes':'sum'}).sort_values(by='bytes', ascending = False)

Unnamed: 0_level_0,packets,bytes
src_ip,Unnamed: 1_level_1,Unnamed: 2_level_1
128.223.240.136,205000,322437120.0
14.160.220.177,205000,320759398.4
128.223.240.122,200000,314572800.0
38.104.44.46,198000,311427072.0
200.105.137.124,196000,308281344.0
...,...,...
200.124.121.126,1000,788000.0
200.46.250.244,1000,788000.0
217.92.60.220,1000,756000.0
82.161.142.172,1000,756000.0


In [36]:
# ips_vector1 = df_vector1['src_ip'].unique()
# ips_vector2 = df_vector2['src_ip'].unique()
# ips_vector3 = df_vector3['src_ip'].unique()
ips_vector4 = df_vector4['src_ip'].unique()
print('unique ips in vector 1 (src_port 0 to dst_port 0):',len(ips_vector1))
print('unique ips in vector 2 (src_port 53 to dst_port 80):',len(ips_vector2))
print('unique ips in vector 3 (src_port 3283 to dst_port ANY):',len(ips_vector3))
print('unique ips in vector 4 (src_port 3702 AND dst_ports [6079, 11219, 1649]):',len(ips_vector4))
print()
print('unique ips in the intersection of IPs in vector 1 and vector2',len(list(set(ips_vector1) & set(ips_vector2))))
print('unique ips in the intersection of IPs in vector 1 and vector3',len(list(set(ips_vector1) & set(ips_vector3))))
print('unique ips in the intersection of IPs in vector 2 and vector3',len(list(set(ips_vector2) & set(ips_vector3))))

print('unique ips in the intersection of IPs in vector 1 and vector4',len(list(set(ips_vector1) & set(ips_vector4))))
print('unique ips in the intersection of IPs in vector 2 and vector4',len(list(set(ips_vector2) & set(ips_vector4))))
print('unique ips in the intersection of IPs in vector 3 and vector4',len(list(set(ips_vector3) & set(ips_vector4))))

unique ips in vector 1 (src_port 0 to dst_port 0): 72984
unique ips in vector 2 (src_port 53 to dst_port 80): 54215
unique ips in vector 3 (src_port 3283 to dst_port ANY): 7631
unique ips in vector 4 (src_port 3702 AND dst_ports [6079, 11219, 1649]): 2508

unique ips in the intersection of IPs in vector 1 and vector2 44607
unique ips in the intersection of IPs in vector 1 and vector3 1
unique ips in the intersection of IPs in vector 2 and vector3 3
unique ips in the intersection of IPs in vector 1 and vector4 797
unique ips in the intersection of IPs in vector 2 and vector4 0
unique ips in the intersection of IPs in vector 3 and vector4 0


# ===========================
# REMAINING
Now we must remove the previous attack vector found and keep analysing the data to find other attack vectors!

In [37]:
df_remaining = df_remaining[(df_remaining['src_port']!='3702') & (~df_remaining['dst_port'].isin(['6079', '11219','1649']))]
print("Source port frequency of flows:")
display (df_remaining['src_port'].value_counts())
print()
print("Destination port frequency of flows:")
display (df_remaining['dst_port'].value_counts())

Source port frequency of flows:


123      6222
40256      20
11376      14
50131      13
1185        4
443         2
2249        1
48760       1
38664       1
39032       1
37172       1
45100       1
49984       1
16705       1
19291       1
20276       1
3329        1
53226       1
Name: src_port, dtype: int64


Destination port frequency of flows:


52914    2106
12098    2059
51396    2057
46961      10
2079        9
27141       8
40678       5
17058       4
46191       4
443         4
25          3
19131       3
7024        2
63404       2
31077       2
430         2
22475       2
13309       1
50368       1
46062       1
33150       1
45959       1
Name: dst_port, dtype: int64

# ===========================

# Vector 5: src_port 123 (and dst_ports [52914, 12098, 51396])

In [38]:
df_vector5 = df_remaining[df_remaining['src_port']=='123']

In [39]:
df_vector5['dst_port'].value_counts()

52914    2106
12098    2059
51396    2057
Name: dst_port, dtype: int64

In [40]:
df_vector5 = df_remaining[(df_remaining['src_port']=='123') & (df_remaining['dst_port'].isin(['52914', '12098','51396']))]

In [41]:
df_vector5.groupby(['src_ip']).agg({'packets':'sum', 'bytes':'sum'}).sort_values(by='bytes', ascending = False)

Unnamed: 0_level_0,packets,bytes
src_ip,Unnamed: 1_level_1,Unnamed: 2_level_1
150.136.41.25,2070000,1.015546e+09
79.172.239.41,2052000,1.006843e+09
150.136.33.4,1908000,9.365881e+08
158.101.119.64,1809000,8.877244e+08
150.136.188.38,1806000,8.863613e+08
...,...,...
60.175.3.22,1000,3.600000e+04
223.247.37.180,1000,3.600000e+04
36.92.190.29,1000,3.600000e+04
120.211.26.104,1000,3.600000e+04


In [42]:
# ips_vector1 = df_vector1['src_ip'].unique()
# ips_vector2 = df_vector2['src_ip'].unique()
# ips_vector3 = df_vector3['src_ip'].unique()
# ips_vector4 = df_vector4['src_ip'].unique()
ips_vector5 = df_vector5['src_ip'].unique()
print('unique ips in vector 1 (src_port 0 to dst_port 0):',len(ips_vector1))
print('unique ips in vector 2 (src_port 53 to dst_port 80):',len(ips_vector2))
print('unique ips in vector 3 (src_port 3283 to dst_port ANY):',len(ips_vector3))
print('unique ips in vector 4 (src_port 3702 AND dst_ports [6079, 11219, 1649]):',len(ips_vector4))
print('unique ips in vector 5 (src_port 123 AND dst_ports [52914, 12098, 51396]):',len(ips_vector5))
print()

print('unique ips in the intersection of IPs in vector 1 and vector2',len(list(set(ips_vector1) & set(ips_vector2))))
print('unique ips in the intersection of IPs in vector 1 and vector3',len(list(set(ips_vector1) & set(ips_vector3))))
print('unique ips in the intersection of IPs in vector 2 and vector3',len(list(set(ips_vector2) & set(ips_vector3))))
print()

print('unique ips in the intersection of IPs in vector 1 and vector4',len(list(set(ips_vector1) & set(ips_vector4))))
print('unique ips in the intersection of IPs in vector 2 and vector4',len(list(set(ips_vector2) & set(ips_vector4))))
print('unique ips in the intersection of IPs in vector 3 and vector4',len(list(set(ips_vector3) & set(ips_vector4))))
print()

print('unique ips in the intersection of IPs in vector 1 and vector5',len(list(set(ips_vector1) & set(ips_vector5))))
print('unique ips in the intersection of IPs in vector 2 and vector5',len(list(set(ips_vector2) & set(ips_vector5))))
print('unique ips in the intersection of IPs in vector 3 and vector5',len(list(set(ips_vector3) & set(ips_vector5))))
print('unique ips in the intersection of IPs in vector 4 and vector5',len(list(set(ips_vector4) & set(ips_vector5))))

len(ips_vector1) +  len(ips_vector2) + len(ips_vector3) + len(ips_vector4) + len(ips_vector5) - 44607

unique ips in vector 1 (src_port 0 to dst_port 0): 72984
unique ips in vector 2 (src_port 53 to dst_port 80): 54215
unique ips in vector 3 (src_port 3283 to dst_port ANY): 7631
unique ips in vector 4 (src_port 3702 AND dst_ports [6079, 11219, 1649]): 2508
unique ips in vector 5 (src_port 123 AND dst_ports [52914, 12098, 51396]): 1229

unique ips in the intersection of IPs in vector 1 and vector2 44607
unique ips in the intersection of IPs in vector 1 and vector3 1
unique ips in the intersection of IPs in vector 2 and vector3 3

unique ips in the intersection of IPs in vector 1 and vector4 797
unique ips in the intersection of IPs in vector 2 and vector4 0
unique ips in the intersection of IPs in vector 3 and vector4 0

unique ips in the intersection of IPs in vector 1 and vector5 0
unique ips in the intersection of IPs in vector 2 and vector5 0
unique ips in the intersection of IPs in vector 3 and vector5 0
unique ips in the intersection of IPs in vector 4 and vector5 0


93960

# ===========================
# REMAINING
Now we must remove the previous attack vector found and keep analysing the data to find other attack vectors!

In [43]:
df_remaining = df_remaining[(df_remaining['src_port']!='123') & (~df_remaining['dst_port'].isin(['52914', '12098','51396']))]
print("Source port frequency of flows:")
display (df_remaining['src_port'].value_counts())
print()
print("Destination port frequency of flows:")
display (df_remaining['dst_port'].value_counts())

Source port frequency of flows:


40256    20
11376    14
50131    13
1185      4
443       2
48760     1
16705     1
20276     1
39032     1
3329      1
37172     1
38664     1
53226     1
2249      1
49984     1
45100     1
19291     1
Name: src_port, dtype: int64


Destination port frequency of flows:


46961    10
2079      9
27141     8
40678     5
443       4
46191     4
17058     4
25        3
19131     3
63404     2
430       2
22475     2
31077     2
7024      2
50368     1
45959     1
46062     1
33150     1
13309     1
Name: dst_port, dtype: int64

# STOP!!!!
The remaining are too less flows!

# Is there any intersection between the IP in the previous attack vectors and the non-UDP traffic?

In [44]:
other_ips = df_victim[df_victim['protocol'] != 'UDP']['src_ip'].unique()

In [45]:
print(len(list(set(other_ips) & set(ips_vector1))))
print(len(list(set(other_ips) & set(ips_vector2))))
print(len(list(set(other_ips) & set(ips_vector3))))
print(len(list(set(other_ips) & set(ips_vector4))))
print(len(list(set(other_ips) & set(ips_vector5))))

22033
15
32
12
0


YES, highly related to IP address that sent fragmented packets (as a consequence of miss-behavior)

In [46]:
df_victim[(df_victim['protocol'] != 'UDP') & (df['src_ip'].isin(ips_vector1))]['protocol'].value_counts()

GRE     5733
EGP     5503
AH      5499
PIM     5259
ICMP      99
TCP        2
Name: protocol, dtype: int64

# ===============================
# Exporting the list of IP addresses in each attack vector

Vector 1 (fragmentation) will not be exported because it is a consequence of the other vectors.

In [47]:
pd.DataFrame(ips_vector2).to_csv('attack_vectors/vector1_src53_dst80.csv')

In [48]:
pd.DataFrame(ips_vector3).to_csv('attack_vectors/vector2_src3283_dstANY.csv')

In [49]:
pd.DataFrame(ips_vector4).to_csv('attack_vectors/vector3_src3702_dst6079-11219-1649.csv')

In [50]:
pd.DataFrame(ips_vector5).to_csv('attack_vectors/vector4_src123_dst52914-12098-51396.csv')