# Data Collection From Net Unicorn Into snl-server-5

### Import Dependencies

In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import os
import time
import pandas as pd
import subprocess
import os
import matplotlib.pyplot as plt
import dotenv

from netunicorn.client.remote import RemoteClient, RemoteClientException
from netunicorn.base import Experiment, ExperimentStatus, Pipeline
from netunicorn.library.tasks.capture.tcpdump import StartCapture, StopNamedCapture
from netunicorn.library.tasks.upload.fileio import UploadToFileIO
from netunicorn.library.tasks.upload.webdav import UploadToWebDav
from netunicorn.library.tasks.basic import SleepTask
from netunicorn.library.tasks.measurements.ookla_speedtest import SpeedTest
from netunicorn.library.tasks.video_watchers.youtube_watcher import WatchYouTubeVideo
from netunicorn.library.tasks.video_watchers.vimeo_watcher import WatchVimeoVideo
from netunicorn.library.tasks.video_watchers.twitch_watcher import WatchTwitchStream
from netunicorn.base import DockerImage

from collections import defaultdict
from scapy.layers.inet import IP, TCP
from scapy.all import rdpcap

from math import ceil

from collections import defaultdict

In [3]:
#Private.env file used to store private variables such as filepaths and Ip address
dotenv.load_dotenv('Private.env')

True

### Set netUnicorn API Credentials

In [4]:
NETUNICORN_ENDPOINT = os.environ.get('NETUNICORN_ENDPOINT', os.getenv('NETUNICORN_ENDPOINT'))
NETUNICORN_LOGIN = os.environ.get('NETUNICORN_LOGIN', os.getenv('NETUNICORN_LOGIN'))          # substitute your login here
NETUNICORN_PASSWORD = os.environ.get('NETUNICORN_PASSWORD', os.getenv('NETUNICORN_PASSWORD')) # substitue your password here

### Start netUnicorn, login, and get availble nodes

In [5]:
client = RemoteClient(endpoint=NETUNICORN_ENDPOINT, login=NETUNICORN_LOGIN, password=NETUNICORN_PASSWORD)
print("Health Check: {}".format(client.healthcheck()))
nodes = client.get_nodes()
print(nodes)

Health Check: True
[[snl-server-5, raspi-e4:5f:01:8d:ca:12, raspi-e4:5f:01:72:a2:eb, raspi-e4:5f:01:56:d8:f3, raspi-e4:5f:01:84:8c:af, raspi-e4:5f:01:88:c5:4b, raspi-e4:5f:01:75:54:04, raspi-e4:5f:01:56:d9:a3, raspi-e4:5f:01:8b:bc:d9, raspi-e4:5f:01:a7:b1:9d, raspi-e4:5f:01:96:01:e8, raspi-e4:5f:01:6c:98:83, raspi-e4:5f:01:72:a2:35, raspi-e4:5f:01:a0:4a:fb, raspi-e4:5f:01:6f:ee:14, raspi-e4:5f:01:8d:29:d5, raspi-e4:5f:01:96:01:fe, raspi-e4:5f:01:9b:85:9c, raspi-e4:5f:01:8c:e3:3f, raspi-e4:5f:01:ad:c9:04, raspi-e4:5f:01:a7:b1:c1, raspi-e4:5f:01:75:6b:09, raspi-e4:5f:01:a0:4f:c5, raspi-e4:5f:01:ad:c9:3f, raspi-e4:5f:01:88:c4:99, raspi-e4:5f:01:9b:84:c4, raspi-e4:5f:01:a7:b1:e5, raspi-e4:5f:01:a7:ae:70, raspi-e4:5f:01:a0:34:a5, raspi-e4:5f:01:ac:e1:c7, raspi-e4:5f:01:a0:32:50, raspi-e4:5f:01:a0:4e:31, raspi-e4:5f:01:ad:c9:11, raspi-e4:5f:01:a0:1e:bb, raspi-e4:5f:01:a0:4b:01, raspi-e4:5f:01:9c:ca:3a, raspi-e4:5f:01:75:54:ec, raspi-e4:5f:01:8c:8a:41, raspi-e4:5f:01:a0:50:5f, raspi-e4:5f:01:

In [6]:
working_node = 'raspi'

## Collecting Network Data from Vimeo Videos

### Capture Videos

- Create dictionary called video_links that connects video label to video link
- All Captures are Stored in folders labeled capture_#, defined in the next cell (on snl server)
- Descriptions of each capture we took with videos, # nodes, # seconds, # loops, run #, and node name (if applicable)
- Stored in snl-server-5.cs.ucsb.edu:/mnt/md0/cs190n/team_dn/capture_<#>/ 

In [7]:
"""
#capture_0 10 node 60 seconds
video_links = {
    'video0': 'https://www.youtube.com/watch?v=_KOpmohOznIiU',
    'video1': 'https://www.youtube.com/watch?v=BGqfVOc_Rl4',
    'video2': 'https://www.youtube.com/watch?v=XVO9CS8D4hQ',
    'video3': 'https://www.youtube.com/watch?v=sYc1MsexWKE',
    'video4': 'https://www.youtube.com/watch?v=KQwPyhcidUE',
    'video5': 'https://www.youtube.com/watch?v=_BPexIx58Zg',
    'video6': 'https://www.youtube.com/watch?v=MbHIxbbyOi8',
    'video7': 'https://www.youtube.com/watch?v=gKCScoG83SA',
    'video8': 'https://www.youtube.com/watch?v=w9iFuKzfoIs',
    'video9': 'https://www.youtube.com/watch?v=slwVFwPjUx4',
}

#capture_1 15 nodes 60 seconds
video_links = {
    'video0': 'https://www.youtube.com/watch?v=vkfTSlTnaBQ',
    'video1': 'https://www.youtube.com/watch?v=KT1-JQTiZGc',
    'video2': 'https://www.youtube.com/watch?v=_38JDGnr0vA',
}


#capture_2 50 nodes videos >5 min(Did not get all videos/nodes), 300 second capture
video_links = {
    'video0': 'https://www.youtube.com/watch?v=vkfTSlTnaBQ',
    'video1': 'https://www.youtube.com/watch?v=KT1-JQTiZGc',
    'video2': 'https://www.youtube.com/watch?v=_38JDGnr0vA',
    'video3': 'https://www.youtube.com/watch?v=GqulwE_yKww',
    'video4': 'https://www.youtube.com/watch?v=v2WCnF3SSUE',
    'video5': 'https://www.youtube.com/watch?v=hif5eI5pBxo',
    'video6': 'https://www.youtube.com/watch?v=0e4qRdlfJcs',
    'video7': 'https://www.youtube.com/watch?v=aIovmgzyuL0',
    'video8': 'https://www.youtube.com/watch?v=k7cGyYaxUnI',
    'video9': 'https://www.youtube.com/watch?v=4GL-X4LqfVc',
}


#capture_3 (Redoing 2 but with 40 nodes) videos > 5 min, 300 second capture
video_links = {
    'video0': 'https://www.youtube.com/watch?v=vkfTSlTnaBQ',
    'video1': 'https://www.youtube.com/watch?v=KT1-JQTiZGc',
    'video2': 'https://www.youtube.com/watch?v=_38JDGnr0vA',
    'video3': 'https://www.youtube.com/watch?v=GqulwE_yKww',
    'video4': 'https://www.youtube.com/watch?v=v2WCnF3SSUE',
    'video5': 'https://www.youtube.com/watch?v=hif5eI5pBxo',
    'video6': 'https://www.youtube.com/watch?v=0e4qRdlfJcs',
    'video7': 'https://www.youtube.com/watch?v=aIovmgzyuL0',
    'video8': 'https://www.youtube.com/watch?v=k7cGyYaxUnI',
    'video9': 'https://www.youtube.com/watch?v=4GL-X4LqfVc',
}

#capture_4 switching to vimeo 60 seconds, 20 nodes
#  second run vimeo 60 seconds, 30 nodes
#    TOTAL capture_4 = 50 nodes, 60 seconds

video_links = {
    'video0': 'https://vimeo.com/872245830',
    'video1': 'https://vimeo.com/249226357',
    'video2': 'https://vimeo.com/250383662',
    'video3': 'https://vimeo.com/255370388',
    'video4': 'https://vimeo.com/259411563',
 
}

#capture_5 Vimeo 60 seconds, 6 nodes, to determine preprocessing steps
video_links = {
    'video0': 'https://vimeo.com/872245830',
    'video1': 'https://vimeo.com/249226357',
}

#capture_6 Vimeo 60 seconds,  (oops)
# raspi-e4:5f:01:a0:34:a8
video_links = {
    'video0': 'https://vimeo.com/872245830',
    'video1': 'https://vimeo.com/249226357',
}


#capture_7 Vimeo 60 seconds, 1 nodes, looped 25 times (NIK)
video_links = {
    'video0': 'https://vimeo.com/872245830',
    'video1': 'https://vimeo.com/249226357',
    'video2': 'https://vimeo.com/250383662',
    'video3': 'https://vimeo.com/255370388',
    'video4': 'https://vimeo.com/259411563',
}

#capture_8 Vimeo 60 seconds, 1 nodes, looped 50 times (Dakota) from 'raspi-e4:5f:01:ac:e3:80' node
video_links = {
    'video0': 'https://vimeo.com/872245830',
    'video1': 'https://vimeo.com/249226357',
    'video2': 'https://vimeo.com/250383662',
    'video3': 'https://vimeo.com/255370388',
    'video4': 'https://vimeo.com/259411563',
}
"""
#capture_9 DEMONSTRATION CAPTURE , Vimeo 60 seconds, 5 nodes, 1 loop (total 25 .pcap files)
# Second watch Vimeo 60 seconds, 2 nodes, 2 loops (total 20 .pcap files)
video_links = {
    'video0': 'https://vimeo.com/872245830',
    'video1': 'https://vimeo.com/249226357',
    'video2': 'https://vimeo.com/250383662',
    'video3': 'https://vimeo.com/255370388',
    'video4': 'https://vimeo.com/259411563',
}



### Constants Used For netUnicorn Pipeline Experiment

In [8]:
CURRENT_ENDPOINT_NAME = 'capture_9'    # folder name stored on snl-server
CAPTURE_TIME = 60                      # time video capture is run for (capture usually last ~20 seconds longer however)
NUM_NODES = 2                          # number of nodes used (run in parallel)
RUN_NUM = 2                            # number used if running pipeline multiple times for a capture_# (ex. = capture_4)
NUM_WATCHES = 2                        # number of time to watch video per node

# Constants for one node experiment only
ONE_NODE = 'raspi-e4:5f:01:ac:e3:80'   # node name for reproducability, only used if NUM_NODES = 1

### netUnicorn Pipeline configurable with constants above 

In [9]:
pipeline = Pipeline()
pipeline.early_stopping = False

for cycle_num in range(NUM_WATCHES):
    for video_name, video_link in video_links.items():
        task_name = f"{video_name}_{RUN_NUM}_{cycle_num}"
        capture_filepath = f"/tmp/{task_name}.pcap"
        
        pipeline.then(StartCapture(filepath=capture_filepath, name=task_name))
        pipeline.then(WatchVimeoVideo(video_link, CAPTURE_TIME))
        pipeline.then(StopNamedCapture(start_capture_task_name=task_name))
        pipeline.then(UploadToWebDav(filepaths={capture_filepath}, endpoint=f"{os.getenv('PIPELINE_ENDPATH')}/{CURRENT_ENDPOINT_NAME}", username={os.getenv('WEBDAV_USERNAME')}, password={os.getenv('WEBDAV_PASSWORD')}))
        pipeline.then(SleepTask(2))

### Select Nodes (Configured In Constants Section)

In [10]:
if NUM_NODES > 1:
    working_nodes = nodes.filter(lambda node: node.name.startswith(working_node)).take(NUM_NODES)
else:
    working_nodes = nodes.filter(lambda node: node.name.startswith(ONE_NODE)).take(1)
    
print(working_nodes)

# Creating the experiment
experiment = Experiment().map(pipeline, working_nodes)
print(experiment)


[raspi-e4:5f:01:8d:ca:12, raspi-e4:5f:01:72:a2:eb]
 - Deployment: Node=raspi-e4:5f:01:8d:ca:12, executor_id=, prepared=False, error=None
 - Deployment: Node=raspi-e4:5f:01:72:a2:eb, executor_id=, prepared=False, error=None


### Installing Libraries vs Docker Image

In [11]:
for line in experiment[0].environment_definition.commands:
    print(line)

sudo apt-get update
sudo apt-get install -y tcpdump
apt install -y python3-pip wget xvfb procps chromium chromium-driver
pip3 install selenium webdriver-manager
sudo apt-get update
sudo apt-get install -y tcpdump
sudo apt-get install -y procps
sudo apt-get install -y curl
sudo apt-get update
sudo apt-get install -y tcpdump
apt install -y python3-pip wget xvfb procps chromium chromium-driver
pip3 install selenium webdriver-manager
sudo apt-get update
sudo apt-get install -y tcpdump
sudo apt-get install -y procps
sudo apt-get install -y curl
sudo apt-get update
sudo apt-get install -y tcpdump
apt install -y python3-pip wget xvfb procps chromium chromium-driver
pip3 install selenium webdriver-manager
sudo apt-get update
sudo apt-get install -y tcpdump
sudo apt-get install -y procps
sudo apt-get install -y curl
sudo apt-get update
sudo apt-get install -y tcpdump
apt install -y python3-pip wget xvfb procps chromium chromium-driver
pip3 install selenium webdriver-manager
sudo apt-get update


We can also use a predefined Docker image which will avoid installing any libraries.

In [12]:
for deployment in experiment:
    # you can explore the image on the DockerHub
    deployment.environment_definition = DockerImage(image='speeeday/chromium-speedtest:0.3.1')

In [13]:
experiment_label = "Vimeo_capture"

Now we can prepare the experiment, check for any errors and execute.

In [14]:
try:
    client.delete_experiment(experiment_label)
except RemoteClientException:
    pass

client.prepare_experiment(experiment, experiment_label)

while True:
    info = client.get_experiment_status(experiment_label)
    print(info.status)
    if info.status == ExperimentStatus.READY:
        break
    time.sleep(20)

ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.READY


In [15]:
for deployment in client.get_experiment_status(experiment_label).experiment:
    print(f"Prepared: {deployment.prepared}, error: {deployment.error}")

Prepared: True, error: None
Prepared: True, error: None


In [16]:
client.start_execution(experiment_label)


while True:
    info = client.get_experiment_status(experiment_label)
    print(info.status)
    if info.status != ExperimentStatus.RUNNING:
        break
    time.sleep(2)

ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING


ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.FINISHED


In [17]:
info = client.get_experiment_status(experiment_label)
print(info.status)

ExperimentStatus.FINISHED


In [19]:
from returns.pipeline import is_successful
""""
for report in info.execution_result:
    print(f"Node name: {report.node.name}")
    print(f"Error: {report.error}")

    result, log = report.result  # report stores results of execution and corresponding log
    
    # result is a returns.result.Result object, could be Success of Failure
    print(f"Result is: {type(result)}")
    data = result.unwrap() if is_successful(result) else result.failure()
    for key, value in data.items():
        print(f"{key}: {value}")

    # we also can explore logs
    for line in log:
        print(line.strip())
    print()
"""

'"\nfor report in info.execution_result:\n    print(f"Node name: {report.node.name}")\n    print(f"Error: {report.error}")\n\n    result, log = report.result  # report stores results of execution and corresponding log\n    \n    # result is a returns.result.Result object, could be Success of Failure\n    print(f"Result is: {type(result)}")\n    data = result.unwrap() if is_successful(result) else result.failure()\n    for key, value in data.items():\n        print(f"{key}: {value}")\n\n    # we also can explore logs\n    for line in log:\n        print(line.strip())\n    print()\n'

Now we can verify that our files are properly created on the server.