# Attack viz

### Table of Content

 - [Cleaning](#cleaning)
 - [Mounting an Attack](#Mounting_an_Attack)
 - [Dependens discovery](#dependens_discovery)

## Set up

In [2]:
import matplotlib
import pandas as pd
import pymysql
%matplotlib inline

In [3]:
connection = pymysql.connect(host='localhost',
                             user='fingerpatch',
                             password='fingerpatch',
                             db='fingerpatch',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)
attack_table = pd.read_sql("SELECT * FROM `ubuntu_captures` ",connection)
ground_truth = pd.read_sql("SELECT * FROM `ubuntu_packets` ",connection)
connection.close()

In [4]:
print(ground_truth.count()[0])
print(attack_table.count()[0])

128148
20


<a id='cleaning'></a>
## Cleaning


Select interesting columns and remove duplicated rows

In [5]:
ground_truth.columns

Index(['id', 'capture_id', 'Package', 'Version', 'Architecture', 'Size',
       'Installed-Size', 'Priority', 'Maintainer', 'SHA1', 'Description',
       'parsedFrom', 'Description-md5', 'Bugs', 'Origin', 'MD5sum', 'Depends',
       'Homepage', 'Source', 'SHA256', 'Section', 'Supported', 'Filename',
       'packageMode'],
      dtype='object')

In [6]:
# Have to commit this change to the table and recreate unique ID
# pMincluded = ground_truth.duplicated(['Package', 'Version', 'Size', 'Depends', 'packageMode']).sum()
# pMexcluded = ground_truth.duplicated(['Package', 'Version', 'Size', 'Depends']).sum()
#pMcat = ground_truth['packageMode'].unique()
#print("By not considering packageMode which consists only in {}, we can withdraw: {} duplcates".format(pMcat ,pMexcluded - pMincluded))
ground_truth = ground_truth.drop_duplicates(['Package', 'Version', 'Size', 'Depends'])

In [7]:
ground_truth = ground_truth.drop(axis= 1, columns=['capture_id','SHA1', 'Priority', 'Description-md5', 'MD5sum', 'SHA256', 'packageMode' ])

In [56]:
attack_table

Unnamed: 0,capture_id,truth_id,nb_flows,HTTP_Seq,Flow1,Flow2,Flow3,Flow4,Flow5,nb_Payload_send1,nb_Payload_send2,nb_Payload_send3,nb_Payload_send4,nb_Payload_send5
0,1,103746,3,[['GET /ubuntu/pool/universe/o/opennebula/libo...,target->yukinko.canonical.com,yukinko.canonical.com->target,target->yukinko.canonical.com,,,0,67874,173,,
1,24,11723,3,[['GET /ubuntu/pool/main/a/augeas/augeas-doc_1...,target->steelix.canonical.com,steelix.canonical.com->target,target->steelix.canonical.com,,,0,677399,151,,
2,25,11724,3,[['GET /ubuntu/pool/main/a/augeas/augeas-lense...,target->steelix.canonical.com,steelix.canonical.com->target,target->steelix.canonical.com,,,0,229659,308,,
3,26,11743,3,[['GET /ubuntu/pool/main/a/autotools-dev/autot...,target->steelix.canonical.com,steelix.canonical.com->target,target->steelix.canonical.com,,,0,44607,157,,
4,27,11746,3,[['GET /ubuntu/pool/main/a/avahi/avahi-dbg_0.6...,target->danava.canonical.com,danava.canonical.com->target,target->danava.canonical.com,,,0,852297,152,,
5,28,14404,3,[['GET /ubuntu/pool/main/d/dee/libdee-doc_1.2....,target->steelix.canonical.com,steelix.canonical.com->target,target->steelix.canonical.com,,,0,58743,165,,
6,29,26801,3,[['GET /ubuntu/pool/universe/g/glpk/glpk-doc_4...,target->steelix.canonical.com,steelix.canonical.com->target,target->steelix.canonical.com,,,0,990053,151,,
7,30,26804,3,[['GET /ubuntu/pool/universe/g/glue-schema/glu...,target->steelix.canonical.com,steelix.canonical.com->target,target->steelix.canonical.com,,,0,33089,155,,
8,31,36546,3,[['GET /ubuntu/pool/universe/i/id3lib3.8.3/lib...,target->steelix.canonical.com,steelix.canonical.com->target,target->steelix.canonical.com,,,0,629023,154,,
9,32,36549,3,[['GET /ubuntu/pool/universe/libi/libidn/libid...,target->steelix.canonical.com,steelix.canonical.com->target,target->steelix.canonical.com,,,0,175417,160,,


<a id='Mounting_an_Attack'></a>
## Mounting an Attack for matching a specific capture to a package.
##### Relying on package size

In [9]:
target = attack_table.loc[0]
target

capture_id                                                          1
truth_id                                                       103746
nb_flows                                                            3
HTTP_Seq            [['GET /ubuntu/pool/universe/o/opennebula/libo...
Flow1                                   target->yukinko.canonical.com
Flow2                                   yukinko.canonical.com->target
Flow3                                   target->yukinko.canonical.com
Flow4                                                            None
Flow5                                                            None
nb_Payload_send1                                                    0
nb_Payload_send2                                                67874
nb_Payload_send3                                                  173
nb_Payload_send4                                                 None
nb_Payload_send5                                                 None
Name: 0, dtype: obje

In [10]:
EXTRA_SIZE_AVERAGE = 283   # Made from stats about captured packets
EXTRA_SIZE_VARIATION = 5
size_to_match = target['nb_Payload_send2']

In [11]:
def distance_from_expected_average_size(x, size_to_match):
    return abs(size_to_match - x - EXTRA_SIZE_AVERAGE)

In [12]:
ground_truth["dist_from_expected_size"] = ground_truth["Size"].map(lambda x: distance_from_expected_average_size(x, size_to_match))

In [14]:
ground_truth.sort_values(by="dist_from_expected_size").head()

Unnamed: 0,id,Package,Version,Architecture,Size,Installed-Size,Maintainer,Description,parsedFrom,Bugs,Origin,Depends,Homepage,Source,Section,Supported,Filename,dist_from_expected_size
39671,39672,libopennebula-java-doc,3.4.1-4.1ubuntu1,all,67592,1194,Ubuntu Developers <ubuntu-devel-discuss@lists....,Java bindings for OpenNebula Cloud API (OCA) -...,packages/archive.ubuntu.com_ubuntu_dists_trust...,https://bugs.launchpad.net/ubuntu/+filebug,Ubuntu,,http://opennebula.org/,opennebula,universe/doc,,pool/universe/o/opennebula/libopennebula-java-...,1
41518,41519,libshisa-dev,1.0.2-3ubuntu2,amd64,67594,385,Ubuntu Developers <ubuntu-devel-discuss@lists....,Development files for the Shishi Kerberos v5 K...,packages/archive.ubuntu.com_ubuntu_dists_trust...,https://bugs.launchpad.net/ubuntu/+filebug,Ubuntu,"libshisa0 (= 1.0.2-3ubuntu2), libshishi-dev (=...",http://www.gnu.org/software/shishi/,shishi,universe/libdevel,,pool/universe/s/shishi/libshisa-dev_1.0.2-3ubu...,3
14153,14154,libcloog-isl-dev,0.18.2-1,amd64,67588,377,Ubuntu Developers <ubuntu-devel-discuss@lists....,Chunky Loop Generator (development files),packages/archive.ubuntu.com_ubuntu_dists_trust...,https://bugs.launchpad.net/ubuntu/+filebug,Ubuntu,"libisl-dev (>= 0.11), libgmp-dev, libcloog-isl...",http://www.CLooG.org,cloog,libdevel,9m,pool/main/c/cloog/libcloog-isl-dev_0.18.2-1_am...,3
34921,34922,libghc-shakespeare-i18n-prof,1.0.0.2-4build1,amd64,67582,429,Ubuntu Developers <ubuntu-devel-discuss@lists....,type-based approach to internationalization; p...,packages/archive.ubuntu.com_ubuntu_dists_trust...,https://bugs.launchpad.net/ubuntu/+filebug,Ubuntu,libghc-shakespeare-i18n-dev (= 1.0.0.2-4build1...,http://hackage.haskell.org/package/shakespeare...,haskell-shakespeare-i18n,universe/haskell,,pool/universe/h/haskell-shakespeare-i18n/libgh...,9
26684,26685,gkrellmoon,0.6-5,amd64,67578,320,Ubuntu MOTU Developers <ubuntu-motu@lists.ubun...,Gkrellm Moon Clock Plugin,packages/archive.ubuntu.com_ubuntu_dists_trust...,https://bugs.launchpad.net/ubuntu/+filebug,Ubuntu,"gkrellm (>= 2.0.0), libatk1.0-0 (>= 1.13.2), l...",,,universe/x11,,pool/universe/g/gkrellmoon/gkrellmoon_0.6-5_am...,13


The first one is the one.

In [15]:
ground_truth.sort_values(by="dist_from_expected_size").iloc[0]

id                                                                     39672
Package                                               libopennebula-java-doc
Version                                                     3.4.1-4.1ubuntu1
Architecture                                                             all
Size                                                                   67592
Installed-Size                                                          1194
Maintainer                 Ubuntu Developers <ubuntu-devel-discuss@lists....
Description                Java bindings for OpenNebula Cloud API (OCA) -...
parsedFrom                 packages/archive.ubuntu.com_ubuntu_dists_trust...
Bugs                              https://bugs.launchpad.net/ubuntu/+filebug
Origin                                                                Ubuntu
Depends                                                                     
Homepage                                              http://opennebula.org/

<a id='dependens_discovery'></a>
## Dependens discovery

let's take only the packages that have only one depends.

Sort them by ascending size

In [16]:
tmp = ground_truth
tmp["#Depends"] = tmp["Depends"].map(lambda x: 0 if x == "" else len(x.split(",")))
one_dep_first10 = tmp[tmp["#Depends"] == 1].sort_values(by = "Size", ascending=True)[:10]

In [17]:
one_dep_first10.iloc[0]

id                                                                     51087
Package                                                              readpst
Version                                                       0.6.59-1build1
Architecture                                                             all
Size                                                                     796
Installed-Size                                                            21
Maintainer                 Ubuntu Developers <ubuntu-devel-discuss@lists....
Description                    Converts Outlook PST files to mbox and others
parsedFrom                 packages/archive.ubuntu.com_ubuntu_dists_trust...
Bugs                              https://bugs.launchpad.net/ubuntu/+filebug
Origin                                                                Ubuntu
Depends                                                            pst-utils
Homepage                                  http://www.five-ten-sg.com/libpst/

Seeking about that depends

In [19]:
ground_truth[ground_truth["Package"] == one_dep_first10.iloc[0]["Depends"]]

Unnamed: 0,id,Package,Version,Architecture,Size,Installed-Size,Maintainer,Description,parsedFrom,Bugs,Origin,Depends,Homepage,Source,Section,Supported,Filename,dist_from_expected_size,#Depends
18279,18280,pst-utils,0.6.59-1build1,amd64,62092,181,Ubuntu Developers <ubuntu-devel-discuss@lists....,tools for reading Microsoft Outlook PST files,packages/archive.ubuntu.com_ubuntu_dists_trust...,https://bugs.launchpad.net/ubuntu/+filebug,Ubuntu,"libc6 (>= 2.14), libgcc1 (>= 1:4.1.1), libgd3 ...",http://www.five-ten-sg.com/libpst/,libpst,utils,9m,pool/main/libp/libpst/pst-utils_0.6.59-1build1...,5499,7


Turns out that this Dependens also have its Dependances

In [21]:
sub_dependances = ground_truth[ground_truth["Package"] == one_dep_first10.iloc[0]["Depends"]].iloc[0]["Depends"]
print(sub_dependances)

libc6 (>= 2.14), libgcc1 (>= 1:4.1.1), libgd3 (>= 2.1.0~alpha~), libglib2.0-0 (>= 2.12.0), libgsf-1-114 (>= 1.14.8), libpst4 (>= 0.6.54), libstdc++6 (>= 4.6)


#### Once Downloading the package we can ideed see that the package doesn't just depends on one single package but many subpackages from that dependance:

On the Docker:

`The following extra packages will be installed:
  fontconfig-config fonts-dejavu-core libfontconfig1 libfreetype6 libgd3
  libglib2.0-0 libglib2.0-data libgsf-1-114 libgsf-1-common libjbig0
  libjpeg-turbo8 libjpeg8 libpst4 libtiff5 libvpx1 libx11-6 libx11-data
  libxau6 libxcb1 libxdmcp6 libxml2 libxpm4 pst-utils sgml-base
  shared-mime-info xml-core`
  
`0 upgraded, 27 newly installed, 0 to remove and 32 not upgraded.
Need to get 5664 kB of archives.`


On the attacker:

`historic =  ['target->danava.canonical.com', 'danava.canonical.com->target', 'target->danava.canonical.com']
server_ip =  ['91.189.88.149', '172.100.0.100', '91.189.88.149']
server_name =  ['danava.canonical.com', 'target', 'danava.canonical.com']
received_Payload =  [5671834]
send_Payload =  [0, 4251]`

So if we calculate with the tipical extra_size for each downloaded package that we get on the attacker side and knowing that 5664kB is rounded:


In [67]:
EXTRA_SIZE_AVERAGE * 27 + 5664000

5671641

#### Let's find out what happens if we download the dependance before  

While downloading pst-utils (*using apt-get install readpst*):

On the victim:

`The following extra packages will be installed:
  fontconfig-config fonts-dejavu-core libfontconfig1 libfreetype6 libgd3
  libglib2.0-0 libglib2.0-data libgsf-1-114 libgsf-1-common libjbig0
  libjpeg-turbo8 libjpeg8 libpst4 libtiff5 libvpx1 libx11-6 libx11-data
  libxau6 libxcb1 libxdmcp6 libxml2 libxpm4 sgml-base shared-mime-info
  xml-core
0 upgraded, 26 newly installed, 0 to remove and 32 not upgraded.
Need to get 5663 kB of archives.`


On the Attacker:

`historic =  ['target->keeton.canonical.com', 'keeton.canonical.com->target', 'target->keeton.canonical.com']
server_ip =  ['91.189.88.161', '172.100.0.100', '91.189.88.161']
server_name =  ['keeton.canonical.com', 'target', 'keeton.canonical.com']
received_Payload =  [5670760]
send_Payload =  [0, 4085]
Ressources cleaned.`


In [68]:
print(" -- Seen By the Attacker -- Difference by downloading the full package and only it's dependances :",5671834 - 5670760)
print(" -- For the ground_truth -- Difference by downloading the full package and only it's dependances :",796 + EXTRA_SIZE_AVERAGE)

 -- Seen By the Attacker -- Difference by downloading the full package and only it's dependances : 1074
 -- For the ground_truth -- Difference by downloading the full package and only it's dependances : 1079


Now that the dependance is installed on the vicitim's machine, we perfom the update of the principal package:


On the victim:

`The following NEW packages will be installed:
  readpst
0 upgraded, 1 newly installed, 0 to remove and 32 not upgraded.
Need to get 796 B of archives.
After this operation, 21.5 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu/ trusty/universe readpst all 0.6.59-1build1 [796 B]
Fetched 796 B in 0s (3656 B/s)   
Download complete and in download only mode`

On the attacker

`historic =  ['target->steelix.canonical.com', 'steelix.canonical.com->target', 'target->steelix.canonical.com']
server_ip =  ['91.189.88.152', '172.100.0.100', '91.189.88.152']
server_name =  ['steelix.canonical.com', 'target', 'steelix.canonical.com']
received_Payload =  [1074]
send_Payload =  [0, 155]`


Indeed, once the dependance is installed, installing just the package 

In [69]:
1074 - 796

278

### SumOfDependences & NumberOfDependances

In [79]:
one_dep_first10.iloc[1]

id                                                                     26194
Package                                                                 gcom
Version                                                               0.32-2
Architecture                                                             all
Size                                                                     820
Installed-Size                                                            20
Maintainer                 Ubuntu Developers <ubuntu-devel-discuss@lists....
Description                     datacard control tool - transitional package
parsedFrom                 packages/archive.ubuntu.com_ubuntu_dists_trust...
Bugs                              https://bugs.launchpad.net/ubuntu/+filebug
Origin                                                                Ubuntu
Depends                                                                comgt
Homepage                                           http://www.pharscape.org/

In [90]:
ground_truth[ground_truth["Package"] == "comgt"]

Unnamed: 0,id,Package,Version,Architecture,Size,Installed-Size,Maintainer,Description,parsedFrom,Bugs,Origin,Depends,Homepage,Source,Section,Supported,Filename,dist_from_expected_size,#Depends
23139,23140,comgt,0.32-2,amd64,42804,188,Ubuntu Developers <ubuntu-devel-discuss@lists....,Option GlobeTrotter and Vodafone datacard cont...,packages/archive.ubuntu.com_ubuntu_dists_trust...,https://bugs.launchpad.net/ubuntu/+filebug,Ubuntu,libc6 (>= 2.7),http://www.pharscape.org/,,universe/net,,pool/universe/c/comgt/comgt_0.32-2_amd64.deb,24787,1


In [22]:
ground_truth[ground_truth["Package"] == "libc6"]

Unnamed: 0,id,Package,Version,Architecture,Size,Installed-Size,Maintainer,Description,parsedFrom,Bugs,Origin,Depends,Homepage,Source,Section,Supported,Filename,dist_from_expected_size,#Depends
1783,1784,libc6,2.19-0ubuntu6.14,amd64,4752538,10508,Ubuntu Developers <ubuntu-devel-discuss@lists....,Embedded GNU C Library: Shared libraries,packages/archive.ubuntu.com_ubuntu_dists_trust...,https://bugs.launchpad.net/ubuntu/+filebug,Ubuntu,libgcc1,http://www.eglibc.org,eglibc,libs,5y,pool/main/e/eglibc/libc6_2.19-0ubuntu6.14_amd6...,4684947,1
14032,14033,libc6,2.19-0ubuntu6,amd64,4729214,10496,Ubuntu Developers <ubuntu-devel-discuss@lists....,Embedded GNU C Library: Shared libraries,packages/archive.ubuntu.com_ubuntu_dists_trust...,https://bugs.launchpad.net/ubuntu/+filebug,Ubuntu,libgcc1,http://www.eglibc.org,eglibc,libs,5y,pool/main/e/eglibc/libc6_2.19-0ubuntu6_amd64.deb,4661623,1


### Before implementing the recursivity function, we consatated two following issues:
    - Going through dependances can lead to cycles (ex: comgt->libc6->libgcc1->libc6)
        => Can be fixed by keeping a list of seen dependances
        
    - Many packages with different version can occure (ex: libc6 2.19-0ubuntu6.14 & 2.19-0ubuntu6 not same size)
        => Maybe take the most recent one (To gain some time keep only the newest version beforehand)
        
    - Some packet are already installed by default (like libc6 in our victim's machin)

In [25]:

# x is the current data Serie  
# summing is the sum of the size in B
# 
def recursiveSearchOnDep(x, summing, df,alreadySeen):
    if x["#Depends"] == 0:
        return (summing, alreadySeen) # Touches the leaves
    
    deps = x["Depends"]         
    if df[["Package"] == pck]:
        summing = summing + x["Size"]
    
    