# Monroe-Recon Youtube Measurements

In [1]:
%matplotlib inline

 #https://jon.oberheide.org/blog/2008/10/15/dpkt-tutorial-2-parsing-a-pcap-file/

import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, FormatStrFormatter
import matplotlib
import pandas as pd
import numpy as np
import os
import glob
import dpkt
import socket
import sys
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

opIp = {}
opName = {}
nodes = {}
operators = {}

def plotPcapsResults(filename, op):
    global opIp

    f = filename.split("/")
    x = f[-1].split("_")
    nodeId = x[1]
    interface = x[2]
    if interface == 'eth0':
        return
    if op != '' and opName[nodeId][interface] != op:
        return

    f = open(filename, 'rb')
    pcap = dpkt.pcap.Reader(f)
    flows = {}
    for ts, buf in pcap:
        try:
            eth = dpkt.ethernet.Ethernet(buf)
            ip = eth.data
            
            if eth.type == dpkt.ethernet.ETH_TYPE_IP:
                dst_ip_addr_str = socket.inet_ntoa(ip.dst) # will contain your destination IP address in BINARY
                src_ip_addr_str = socket.inet_ntoa(ip.src) # will contain your destination IP address in BINARY

                tcp = ip.data

                src_port = "0"
                dst_port = "0"

                if not ip.p==dpkt.ip.IP_PROTO_TCP and not ip.p==dpkt.ip.IP_PROTO_UDP:
                    continue
                src_port = str(tcp.sport)
                dst_port = str(tcp.dport)
                
                if dst_port == "53" or src_port == "53":
                    continue
                if dst_port == "80" or src_port == "80":
                    continue

                flowKey = src_ip_addr_str+":"+src_port+":"+dst_ip_addr_str+":"+dst_port

                if not flowKey in flows.keys():
                    flows[flowKey] = ([ts],[(len(tcp.data))*8.0/1000.0])
                else:
                    flows[flowKey][1].append(flows[flowKey][1][len(flows[flowKey][1])-1] + (len(tcp.data)*8.0/1000.0)) 
                    flows[flowKey][0].append(ts)
        except:
            pass

    params = {'legend.fontsize': 'x-large',
             'figure.figsize': (10, 5),
            'axes.labelsize': 'x-large',
            'axes.titlesize':'x-large',
            'xtick.labelsize':'x-large',
            'lines.markersize' : '10',
            'lines.linewidth' : '2',
            'ytick.labelsize':'x-large'}
    plt.rcParams.update(params)

    fig, ax = plt.subplots(figsize=(12, 12))
    ax.set_axis_bgcolor("white")

    
    baseTime = []
    for f in flows.keys():
        x = (flows[f][0],flows[f][1])
        
        empty = True
        for k in x[1]:
            if k>50:
                empty=False
                baseTime.append(min(x[0]))
                break
    labels = ['pop_short','pop_long','nonpop_short','nonpop_long']
    color_dict = {'pop_short':'r','pop_long':'g','nonpop_short':'b','nonpop_long':'orange'}
    label_dict = {}
    for idx,t in enumerate(sorted(baseTime)):
        label_dict[t] = labels[idx]
      
            
    for f in flows.keys():
        x = (flows[f][0],flows[f][1])
        base = 0
        if len(x[0])>0:
            base = min(x[0])

        x = ([c-base for c in x[0]],x[1])
        
        empty = True
        for k in x[1]:
            if k>50:
                empty=False
                break

        if not empty:
            l = label_dict[base]
            plt.plot(x[0],x[1],label=l,
                     color=color_dict[label_dict[base]])
    plt.xlabel("Timstamp")
    plt.ylabel("Packetsize")
    if opIp.has_key(nodeId) and opIp[nodeId].has_key(interface):
        plt.title(nodeId+" - "+interface+" ("+opName[nodeId][interface]+") - "+opIp[nodeId][interface])
    else:
        plt.title(filename)
    plt.grid(color='gray')
    plt.legend()
    #plt.savefig(nodeId+"-"+interface+"-"+opIp[nodeId][interface]+'.png', bbox_inches='tight')
    plt.show()
    

    
def readOpIp(runningId, filename):
    global opIp
    f = open(filename)
    for l in f.readlines():
        l = l.strip()
        if 'downloaded on' in l:
            x = l.split()
            nodeId = x[5]
            op = x[7]
            opN = x[8]
            ip = x[10]
            if '2018' in x[0]:
                nodeId = x[6]
                op = x[8]
                opN = x[9]
                ip = x[11]
            if not opIp.has_key(nodeId):
                opIp[nodeId] = {}
                opName[nodeId] = {}
            if opIp[nodeId].has_key(op) and opIp[nodeId][op] != ip:
                print "!!!!!!!!!!!!!!!WARNING!!!!!!!!!!!!!"
            opIp[nodeId][op] = ip
            opIp[nodeId][ip] = op
            opName[nodeId][op] = opN
            if nodes.has_key(nodeId) and nodes[nodeId] != runningId:
                print "Multiple Running id for ",nodeId,":",nodes[nodeId],runningId
            nodes[nodeId] = runningId
            if not operators.has_key(opN):
                operators[opN] = set()
            operators[opN].add(runningId)






baseFolder = "."


def plotOrInit(folders=os.listdir(baseFolder), plot=True, op=''):
    for nodeFolder in folders:
        if not plot:
            nodeFolder = baseFolder+"/"+nodeFolder
        if nodeFolder == '.ipynb_checkpoints':
            continue
        pcaps = glob.glob(nodeFolder+"/*.pcap")
        readOpIp(nodeFolder, nodeFolder+"/container.log")
        for p in pcaps:
            try:
                if plot:
                    plotPcapsResults(p,op)
                else:
                    pass
            except:
                pass
    
baseFolder = "/home/monroe_node/dashboards/youtube/youtubePcaps"
plotOrInit(folders=os.listdir(baseFolder), plot=False)
    



In [12]:
%matplotlib inline
import seaborn as sns
from ipywidgets import interactive
from ipywidgets import HTML
from IPython.display import display


f = HTML("<h2>Plot in progress....<h2>")
display(f)


def pl(NodeId):
    f.value = "<h2>Plot in progress....<h2>"
    plotOrInit(folders=[nodes[NodeId]])
    f.value = ""

i= interactive(pl, NodeId=nodes.keys())
display(i)



In [3]:
f2 = HTML("<h2>Plot in progress....<h2>")
display(f2)

def pl2(Operator):
#    print operators[Operator]
    f2.value = "<h2>Plot in progress....<h2>"
    plotOrInit(folders=operators[Operator],op=Operator)
    f2.value = ""

i2 = interactive(pl2, Operator=operators.keys())
display(i2)


<function __main__.plot>