## Mapping to known libraries & TLS version, extension analysis

### Import libraries

In [1]:
import pandas as pd
import numpy as np
import os
import json
from tqdm import tqdm
import sqlite3
import time
import hashlib
import re
import datetime
import matplotlib.pyplot as plt
import matplotlib
import editdistance
import functools
from IPython.display import clear_output
import seaborn as sns
import random
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

### Readin datasets; iot dataset is the one with distance to library calculated

In [2]:
lib = pd.read_csv("../library_fps/library_fps.csv")

In [3]:
iot = pd.read_csv("../datasets/2014dev_with_dist2lib.csv")

In [4]:
iot['extension_types'] = iot['extension_types'].fillna("missing")

In [5]:
lib['extension_types'].sample(7)

3299    11+10+13+15+13172+16+21
4236    11+10+13+15+13172+16+21
1498             11+10+13+15+21
1591             11+10+13+15+21
5104    11+10+13+15+13172+16+21
2684                      11+10
4394       11+10+13+15+13172+21
Name: extension_types, dtype: object

## 4.1 Matching with libraries

### Statistics: fps

In [6]:
iot['device_id'].value_counts().shape

(2014,)

In [7]:
iot.drop_duplicates(['cipher_suites', 'extension_types', 'tls_version']).shape

(905, 20)

In [8]:
iot.drop_duplicates(['device_id', 'cipher_suites', 'extension_types', 'tls_version']).shape

(5827, 20)

In [9]:
tmp_mergelib = pd.merge(iot, lib, how='left', on=['cipher_suites', 'extension_types', 'tls_version'])

In [10]:
tmp_mergelib.loc[tmp_mergelib['library_version'].notna()].drop_duplicates(['cipher_suites', 'extension_types', 'tls_version']).shape

(22, 26)

In [11]:
tmp_mergelib2 = pd.merge(iot, lib, how='inner', on=['cipher_suites', 'extension_types', 'tls_version'])

In [12]:
tmp_mergelib2.drop_duplicates(['cipher_suites', 'extension_types', 'tls_version']).shape

(22, 26)

In [13]:
tmp_mergelib2.groupby('device_vendor')['device_id'].agg(lambda x: x.nunique()).reset_index(name='count').sort_values(by='count', ascending=False)

Unnamed: 0,device_vendor,count
0,amazon,136
27,wyze,70
5,google,67
8,lg,27
17,samsung,25
20,synology,24
19,sony,18
12,nvidia,12
26,wink,10
4,canary,10


### Statistics: ciphersuites

In [14]:
iot.drop_duplicates('cipher_suites').shape

(364, 20)

In [15]:
iot_cipher = pd.merge(iot, lib, how='inner', on=['cipher_suites'])

In [16]:
iot_cipher.drop_duplicates('cipher_suites').shape

(18, 28)

### Statistics: extension

In [17]:
iot.drop_duplicates('extension_types').shape

(211, 20)

In [18]:
iot['extension_types'].sample(10)

5051    23+65281+10+11+35+16+5+13+18+51+45+43+27+21
3359                                    11+10+35+13
4958                              65281+23+13+11+10
1408                              11+10+35+13+13172
4631    65281+23+13+5+13172+18+16+11+51+45+43+10+21
3037                      23+65281+10+11+35+16+5+13
2586    23+65281+10+11+35+16+5+13+18+51+45+43+27+41
5769                                             35
5018                     23+65281+10+11+16+13+13172
2544                      23+65281+10+11+35+16+5+13
Name: extension_types, dtype: object

In [19]:
def remove_ext(extensions):
    return extensions.replace("+13172+16", "").replace("+13172", "").replace("13172+", "").replace("+16", "").replace("16+", "").replace("+65281", "").replace("65281+", "")

In [20]:
lib_ex = lib.copy()

In [21]:
lib_ex['extension_types'] = lib_ex['extension_types'].apply(lambda x: remove_ext(x))

In [22]:
iot_ex = iot.copy()
iot_ex['extension_types'] = iot_ex['extension_types'].fillna("missing").apply(lambda x: remove_ext(x))

In [23]:
iot_ex['extension_types'].sample(7)

1405    23+10+11+35+5+13+18+51+45+43+27+41
5583                        11+10+35+13+15
5734                        11+10+13+15+21
1240                        11+10+22+23+13
2502                           11+10+35+13
84                          11+10+13+15+21
4916                           11+10+35+13
Name: extension_types, dtype: object

In [24]:
lib_ex['extension_types'].sample(7)

1058                         11+10
3813    11+10+22+23+13+43+45+51+21
266              11+10+35+13+15+21
3327    11+10+22+23+13+43+45+51+21
439                          11+10
6270                   13+11+10+23
2902                11+10+13+15+21
Name: extension_types, dtype: object

In [25]:
iot_ex['extension_types'].value_counts().shape

(151,)

In [26]:
iot_mergelib = pd.merge(iot_ex, lib_ex, how='inner', on=['extension_types'])

In [27]:
iot_mergelib['extension_types'].value_counts().shape

(17,)

### Single proposed ciphersuite

In [28]:
iot.loc[iot['cipher_suites'] == '47']['device_id'].value_counts().shape

(334,)

In [29]:
iot.loc[iot['cipher_suites'] == '47'].groupby('extension_types')['device_id'].agg(lambda x: x.nunique()).reset_index(name='count').sort_values(by='count', ascending=False)

Unnamed: 0,extension_types,count
1,missing,326
0,65281+13,8


In [30]:
iot.loc[(iot['cipher_suites'] == '47') & \
        (iot['extension_types'] == '65281+13')]['device_vendor'].value_counts()

silicondust    7
hdhomerun      1
Name: device_vendor, dtype: int64

## 4.3 TLS version and extensions

### TLS version

In [31]:
iot['device_id'].value_counts().shape

(2014,)

In [32]:
iot['tls_version'].value_counts().nlargest(20)

771      5214
769       236
768        31
770        18
1645        2
43308       2
22469       2
376         2
58710       1
56785       1
13776       1
15801       1
9602        1
13664       1
40277       1
15925       1
9518        1
11515       1
29940       1
21728       1
Name: tls_version, dtype: int64

In [33]:
iot.loc[iot['tls_version'] == 772]

Unnamed: 0,idx,likely_libver,lib_ciphersuites,device_id,cipher_suites,extension_types,tls_version,device_vendor,device_name,cipher_code,ciphersuite_len,cipher_code_idxl,cipher_code_hex,updated_hexcodelis,updated_codelis,likely,likely_idx,likely_score,cumulative_agreement,distance_2lib_sim


In [34]:
iot_sub = iot.loc[(iot['tls_version'] == 771) | \
                  (iot['tls_version'] == 769) | \
                  (iot['tls_version'] == 768) | \
                  (iot['tls_version'] == 770)]

In [35]:
iot_subgb = iot_sub.groupby(['device_id'])['tls_version'].agg(lambda x: x.nunique()).reset_index(name='num_dist_tlsver')

In [36]:
iot_subgb.loc[iot_subgb['num_dist_tlsver'] > 1].drop_duplicates('device_id').shape

(194, 2)

In [37]:
iot_sub.loc[iot_sub['tls_version'] == 768].groupby('device_vendor')['device_id'].agg(lambda x: x.nunique()).reset_index(name='unique_cnt')

Unnamed: 0,device_vendor,unique_cnt
0,amazon,13
1,lg,2
2,samsung,4
3,synology,5
4,tplink,1
5,western digital,1


### Extensions

In [38]:
iot.sample(1)

Unnamed: 0,idx,likely_libver,lib_ciphersuites,device_id,cipher_suites,extension_types,tls_version,device_vendor,device_name,cipher_code,ciphersuite_len,cipher_code_idxl,cipher_code_hex,updated_hexcodelis,updated_codelis,likely,likely_idx,likely_score,cumulative_agreement,distance_2lib_sim
3856,35,curl-7.71.043_openssl-1.0.1u,"[49200, 49196, 49192, 49188, 49172, 49162, 163, 159, 107, 106, 57, 56, 136, 135, 49202, 49198, 49194, 49190, 49167, 49157, 157, 61, 53, 132, 49199, 49195, 49191, 49187, 49171, 49161, 162, 158, 103, 64, 51, 50, 154, 153, 69, 68, 49201, 49197, 49193, 49189, 49166, 49156, 156, 60, 47, 150, 65, 7, 49170, 49160, 22, 19, 49165, 49155, 10, 255]",sa4d5b33226,49200+49196+49192+49188+49172+49162+163+159+107+106+57+56+49202+49198+49194+49190+49167+49157+157+61+53+49199+49195+49191+49187+49171+49161+162+158+103+64+51+50+49201+49197+49193+49189+49166+49156+156+60+47+49170+49160+22+19+49165+49155+10+255,11+10+35+13+13172,771,roku,streamer,"[49200, 49196, 49192, 49188, 49172, 49162, 163, 159, 107, 106, 57, 56, 49202, 49198, 49194, 49190, 49167, 49157, 157, 61, 53, 49199, 49195, 49191, 49187, 49171, 49161, 162, 158, 103, 64, 51, 50, 49201, 49197, 49193, 49189, 49166, 49156, 156, 60, 47, 49170, 49160, 22, 19, 49165, 49155, 10, 255]",50,"['49200:0', '49196:1', '49192:2', '49188:3', '49172:4', '49162:5', '163:6', '159:7', '107:8', '106:9', '57:10', '56:11', '49202:12', '49198:13', '49194:14', '49190:15', '49167:16', '49157:17', '157:18', '61:19', '53:20', '49199:21', '49195:22', '49191:23', '49187:24', '49171:25', '49161:26', '162:27', '158:28', '103:29', '64:30', '51:31', '50:32', '49201:33', '49197:34', '49193:35', '49189:36', '49166:37', '49156:38', '156:39', '60:40', '47:41', '49170:42', '49160:43', '22:44', '19:45', '49165:46', '49155:47', '10:48', '255:49']","['0xc030', '0xc02c', '0xc028', '0xc024', '0xc014', '0xc00a', '0xa3', '0x9f', '0x6b', '0x6a', '0x39', '0x38', '0xc032', '0xc02e', '0xc02a', '0xc026', '0xc00f', '0xc005', '0x9d', '0x3d', '0x35', '0xc02f', '0xc02b', '0xc027', '0xc023', '0xc013', '0xc009', '0xa2', '0x9e', '0x67', '0x40', '0x33', '0x32', '0xc031', '0xc02d', '0xc029', '0xc025', '0xc00e', '0xc004', '0x9c', '0x3c', '0x2f', '0xc012', '0xc008', '0x16', '0x13', '0xc00d', '0xc003', '0xa', '0xff']","['0xc030', '0xc02c', '0xc028', '0xc024', '0xc014', '0xc00a', '0xa3', '0x9f', '0x6b', '0x6a', '0x39', '0x38', '0xc032', '0xc02e', '0xc02a', '0xc026', '0xc00f', '0xc005', '0x9d', '0x3d', '0x35', '0xc02f', '0xc02b', '0xc027', '0xc023', '0xc013', '0xc009', '0xa2', '0x9e', '0x67', '0x40', '0x33', '0x32', '0xc031', '0xc02d', '0xc029', '0xc025', '0xc00e', '0xc004', '0x9c', '0x3c', '0x2f', '0xc012', '0xc008', '0x16', '0x13', '0xc00d', '0xc003', '0xa', '0xff']","[49200, 49196, 49192, 49188, 49172, 49162, 163, 159, 107, 106, 57, 56, 49202, 49198, 49194, 49190, 49167, 49157, 157, 61, 53, 49199, 49195, 49191, 49187, 49171, 49161, 162, 158, 103, 64, 51, 50, 49201, 49197, 49193, 49189, 49166, 49156, 156, 60, 47, 49170, 49160, 22, 19, 49165, 49155, 10, 255]","[35, 0.819672131147541]",35,0.819672,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9230769230769231, 0.8571428571428571, 0.8666666666666667, 0.875, 0.8823529411764706, 0.8888888888888888, 0.8947368421052632, 0.9, 0.9047619047619048, 0.9090909090909091, 0.9130434782608695, 0.875, 0.88, 0.8846153846153846, 0.8888888888888888, 0.8928571428571429, 0.896551724137931, 0.9, 0.9032258064516129, 0.90625, 0.9090909090909091, 0.9117647058823529, 0.9142857142857143, 0.9166666666666666, 0.8918918918918919, 0.868421052631579, 0.8461538461538461, 0.825, 0.8292682926829268, 0.8333333333333334, 0.8372093023255814, 0.8409090909090909, 0.8444444444444444, 0.8478260869565217, 0.851063829787234, 0.8541666666666666, 0.8571428571428571, 0.84, 0.8235294117647058, 0.8155339805825242, 0.8269230769230769, 0.8380952380952381, 0.8490566037735849, 0.8598130841121495, 0.8703703703703703, 0.8807339449541285, 0.8909090909090909, 0.9009009009009009]",3


In [39]:
iot['extensions'] = iot['extension_types'].apply(lambda x: [int(val) for val in x.split("+")] if x != "missing" else 99999)

In [40]:
iot_sub = iot.loc[iot['extensions'] != 99999][['device_id', 'device_vendor', 'extensions']].reset_index().drop('index', 1)

In [41]:
iot_sub

Unnamed: 0,device_id,device_vendor,extensions
0,s00058cef5a,amazon,"[11, 10, 35, 13]"
1,s00058cef5a,amazon,"[11, 10, 35, 13, 16, 21]"
2,s00058cef5a,amazon,"[11, 10, 35, 13]"
3,s000859518c,amazon,"[23, 65281, 10, 11, 35, 16, 5, 13, 18, 51, 45, 43, 27, 21]"
4,s000859518c,amazon,"[65281, 23, 35, 13, 5, 18, 16, 11, 10]"
...,...,...,...
5463,sffc152f21e,roku,"[11, 10, 13, 15, 13172, 16, 21]"
5464,sffc152f21e,roku,"[11, 10, 35, 13, 15]"
5465,sffeb109e29,vera,"[11, 10, 13, 15]"
5466,sfff9327ea0,roku,"[11, 10, 13, 15, 13172, 16, 21]"


In [42]:
iot_exp = iot_sub.explode('extensions').reset_index(drop=True)

In [43]:
iot_expgb = iot_exp.groupby('extensions')['device_id'].agg(lambda x: x.nunique()).reset_index(name='unique_devcnt').sort_values('unique_devcnt', ascending=False)

In [44]:
iot_expgb

Unnamed: 0,extensions,unique_devcnt
5,13,1993
3,10,1931
4,11,1917
15,35,1445
7,16,1393
9,21,1053
11,23,957
25,13172,946
6,15,884
29,65281,835
