# Experiment

## Prerequisites

Extract the data to `./metrics`

## Preparations

Import necessary libraries

Idées de mesures:

- distribution des fonctions
- politique basique edge first ou quoi
- temps de deploiement vs nombre de noeuds dans le Fog
- temps de deploiement vs nombre de fonctions
- la mémoire et le cpu au niveau du noeud fog

In [22]:
# install.packages('IRkernel') # Requires the tk package
# IRkernel::installspec()  # to register the kernel in the current R installation

install.packages(c('reticulate', 'tidyverse', 'igraph', 'r2r', 'formattable', 'stringr', 'viridis', 'zoo'))

Installing packages into ‘/home/volodia/R/x86_64-pc-linux-gnu-library/4.2’
(as ‘lib’ is unspecified)

also installing the dependencies ‘gargle’, ‘curl’, ‘png’, ‘googledrive’, ‘googlesheets4’, ‘httr’, ‘rvest’, ‘xml2’


“installation of package ‘png’ had non-zero exit status”
“installation of package ‘xml2’ had non-zero exit status”
“installation of package ‘igraph’ had non-zero exit status”
“installation of package ‘reticulate’ had non-zero exit status”
“installation of package ‘rvest’ had non-zero exit status”
“installation of package ‘tidyverse’ had non-zero exit status”


In [23]:
# To call python from R
library(reticulate)
library(tidyverse)
library(igraph)
library(r2r)
library(formattable)
library(stringr)
library(viridis)
library(zoo) # moving averages    

ERROR: Error in library(reticulate): there is no package called ‘reticulate’


Load the the Python `integration.py` script to use its variable in R

In [None]:
use_virtualenv('.venv/', required = TRUE)
import_from_path("monitoring", path = ".")
import_from_path("k3s", path = ".")

# Always throws an error because it is a CLI tool; it is not made to be loaded in R
try(source_python('integration.py')) 

Load the graph network as an adjancency matrix

In [None]:
raw <- py$ADJACENCY

adjancy_transform <- function(x) {
  ret = matrix(0, nrow = length(x), ncol = length(x))
  rownames(ret) <- c(sort(names(x)))
  colnames(ret) <- c(sort(names(x)))
  for (ii in seq_along(x)) {
    for (jj in seq_along(x[[ii]])) {
      ret[names(x)[[ii]], x[[ii]][[jj]][[1]]] <- strtoi(x[[ii]][[jj]][[2]])
    }

  }
  return(ret)
}

graph_matrix <- adjancy_transform(raw)
net <- graph_from_adjacency_matrix(as.matrix(graph_matrix), weighted = TRUE)
plot(net, layout=layout_as_tree)

# ceb <- cluster_edge_betweenness(g)
#
# dendPlot(ceb, mode="hclust")
# plot(ceb, g)


## Data preprocessing

In [None]:
names_raw <- read.csv2(file = 'metrics/names.csv', header = TRUE, sep = '\t')
names <- hashmap()
names[names_raw$instance] <- names_raw$name

missing <- data.frame(instance = names_raw$name, n = 0)

Load the names of the node and their correspondance in their IP on the virtual network

In [None]:
prepare <- function(x) {
  return(
    x %>%
      mutate(instance = names[instance]) %>%
      mutate(instance = as.character(instance)) %>%
      mutate(timestamp_raw = as.numeric(as.character(timestamp))) %>%
      mutate(timestamp = as.POSIXct(timestamp_raw, origin = "1970-01-01")) %>%
      mutate(value = as.numeric(as.character(value))) %>%
      merge(missing, all = TRUE) %>%
      arrange(instance)
  )
}

## Data processing

Check what nodes we have values for (usually the ones who have submitted bids and etc.):
- Green is ok
- Red is a node that was not present in the data

In [None]:
mem <- read.csv2(file = paste(METRICS_PATH, "fog_node_memory_available.csv", sep = "/"), header = TRUE, sep = "\t") %>%
  prepare() %>%
  distinct(instance, value, .keep_all = TRUE) %>%
  select(c(instance, value)) %>%
  mutate(dyn = formattable::percent(value / max(value, na.rm = TRUE))) %>%
  mutate(value = as.numeric(as.character(value)))

cpu <- read.csv2(file = paste(METRICS_PATH, "fog_node_cpu_used.csv", sep = "/"), header = TRUE, sep = "\t") %>%
  prepare() %>%
  select(c(instance, value)) %>%
  distinct(instance, value, .keep_all = TRUE) %>%
  group_by(instance) %>%
  mutate(value = as.numeric(as.character(value)))

In [None]:
PercentageColourConnected <- function(x) {
  colorRampPalette(c("red", "green"))(2)[ifelse(is.na(x), 1, 2)]
}
net_connected <- graph_from_adjacency_matrix(as.matrix(graph_matrix), weighted = TRUE)
V(net_connected)$color <- PercentageColourConnected(mem$value)
plot(net_connected, layout = layout_as_tree, edge.label = E(net_connected)$weight)

Describe what nodes we have:
- size is the number of CPUs
- color is the RAM (purple is the max in the network)

In [None]:
PercentageColour <- function(x){colorRampPalette(c('white','purple'))(101)[round(ifelse(is.na(x), 0, x)*100)+1]}
net_prez <- graph_from_adjacency_matrix(as.matrix(graph_matrix), weighted = TRUE)
V(net_prez)$size <- ifelse(is.na(cpu$value), 0, cpu$value * 8)
V(net_prez)$color <- PercentageColour(mem$dyn)
plot(net_prez, layout = layout_as_tree, edge.label = E(net_prez)$weight)

In [None]:
bids_raw <- read.csv2(file = paste(METRICS_PATH, "fog_node_bids.csv", sep = "/"), header = TRUE, sep = "\t") %>%
  prepare()

head(bids_raw)

In [None]:
bids_won_raw <- bids_raw %>%
  select(c(instance, function_name, value))

bids_won_all_zero <- data.frame(instance = names_raw$name, n = 0)
bids_won <- bids_won_raw %>%
  group_by(function_name) %>%
  slice(which.min(value)) %>%
  group_by(instance) %>%
  summarise(n = n())

bids_won

# Add missing participants and put them at 0
bids_won <- bids_won %>%
  merge(bids_won_all_zero, all = TRUE) %>%
  group_by(instance) %>%
  summarise(across(everything(), sum)) %>%
  arrange()

net_won <- graph_from_adjacency_matrix(as.matrix(graph_matrix), weighted = TRUE)
V(net_won)$size <- bids_won$n * 2
plot(net_won, layout = layout_as_tree)

Plot the free memory

In [None]:
mem_used <- read.csv2(file = paste(METRICS_PATH, "fog_node_memory_usage.csv", sep = "/"), header = TRUE, sep = "\t") %>% prepare()
mem_avail <- read.csv2(file = paste(METRICS_PATH, "fog_node_memory_allocatable.csv", sep = "/"), header = TRUE, sep = "\t") %>% prepare()

mem_free <- mem_used %>%
  inner_join(mem_avail, by = c("instance", "timestamp")) %>%
mutate(free = (value.y - value.x) / 1e09) %>%
  group_by(instance) %>%
  mutate(free_percent = formattable::percent(free / max(free, na.rm = TRUE))) %>%
  select(instance, timestamp, free, free_percent)

mem_free %>%
  ggplot(aes(x = timestamp, y = free_percent, color = instance)) +
  geom_step() +
  # geom_point() +
labs(x = "Time", y = "Free memory (%)", title = "Free memory in fog_nodes over time", subtitle = "True metrics as seen by k3s on the node") +
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 

Latency of the nodes from their rolling average

> It is actually the double of the set latency because first the TCP connection is established and then the data flows. So those are actually: src -> dest and then src -> dest, so counting twice as long as the actual network latency.

In [None]:
mem_used <- read.csv2(file = paste(METRICS_PATH, "fog_node_memory_used.csv", sep = "/"), header = TRUE, sep = "\t") %>% prepare()
mem_avail <- read.csv2(file = paste(METRICS_PATH, "fog_node_memory_available.csv", sep = "/"), header = TRUE, sep = "\t") %>% prepare()

mem_free <- mem_used %>%
  inner_join(mem_avail, by = c("instance", "timestamp")) %>%
mutate(free = (value.y - value.x) / 1e09) %>%
  group_by(instance) %>%
  mutate(free_percent = formattable::percent(free / max(free, na.rm = TRUE))) %>%
  select(instance, timestamp, free, free_percent)

mem_free %>%
  ggplot(aes(x = timestamp, y = free_percent, color = instance)) +
  geom_step() +
  # geom_point() +
  labs(x = "Time", y = "Free memory (%)", title = "Free memory in fog_nodes over time", subtitle = "As seen by the inside of the fog node software")

In [None]:
latency <- read.csv2(file = paste(METRICS_PATH, "fog_node_neighbors_latency_rolling_avg.csv", sep = "/"), header = TRUE, sep = "\t") %>%
  prepare() %>%
  mutate(instance_to = as.character(names[instance_to]))

latency %>%
  group_by(instance, instance_to) %>%
  summarise(mean_ms = round(mean(value * 1000), 0))

latency %>%
  ggplot(aes(x = timestamp, y = value, color = instance_to)) +
geom_smooth() +
  labs(x = "Time", y = "Latency (s)", title = "Evolution of perceived latency from a node to its neighbors over time", subtitle = "Each row is a “from”") +
  facet_grid(rows = vars(instance))+
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 

latency %>% group_by(instance, instance_to) %>% mutate(max = max(value)) %>% select(instance, instance_to, max) %>% 
  ggplot(aes(x = instance, y = max, fill = instance_to)) +
geom_col(position="dodge") +
  labs(x = "Source", y = "Latency (s)", title = "Max latency from a node to its neighbors") +
  # facet_grid(rows = vars(instance_to))+
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 

In [None]:
iot_request_duration <- read.csv2(file = paste(METRICS_PATH, "iot_emulation_http_request_duration_seconds_print_bucket.csv", sep = ""), header = TRUE, sep = "\t") %>%
  prepare() %>%
  select(instance, tag, value, le, n)

head(iot_request_duration)

In [None]:
bids_won_function <- bids_raw %>%
  group_by(function_name) %>%
  slice(which.min(value)) %>%
  mutate(winner = instance)

head(bids_won_function)

bids_function <- bids_raw %>%
  select(c(instance, function_name, value)) %>%
  distinct() %>%
  mutate(latency = stringr::str_match(function_name, "(.+)-(.+)-([0-9]+)-([0-9]+)-([0-9]+)") %>% .[, 4]) %>%
  inner_join(bids_won_function, by = c("function_name"))

head(bids_function)

bids_function %>%
  ggplot(aes(x = latency, y = value.x, group = function_name, color = winner, label = instance.x)) +
  geom_boxplot() +
  # geom_point() +
  # geom_text(hjust = 0, vjust = 0) +
  labs(x = "Functions", y = "Bids (currency)", title = "Bids made for each functions submitted", subtitle = "Color matches the winner's")+
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 

In [None]:
bids_function %>%
  ggplot(aes(x = latency, y = value.x, group = function_name, color = winner, label = instance.x)) +
  geom_boxplot() +
  # geom_point() +
  # geom_text(hjust = 0, vjust = 0) +
  labs(x = "Functions", y = "Bids (currency)", title = "Bids made for each functions submitted", subtitle = "Color matches the winner's")+
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 

In [None]:
winner_gains <- bids_won_function %>%
  select(c(winner, function_name, value)) %>%
  distinct()
head(winner_gains)

winner_gains %>%
  ggplot(aes(x = winner, y = value, fill = winner)) +
  geom_col() +
  labs(x = "Functions", y = "nodes", title = "Monetary gains")+
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 

In [None]:
head(bids_function)

bids_function %>%
  select(winner, function_name) %>%
  distinct() %>%
  ggplot(aes(x = winner, fill = winner)) +
  geom_bar() +
  labs(x = "Nodes", y = "Number of won bids", title = "Number of bids won")+
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 

In [None]:
toto <- read.csv2(file = "metrics/iot_emulation_http_request_duration_seconds_print_sum.csv", header = TRUE, sep = "\t") %>%
  prepare()

titi <- read.csv2(file = "metrics/iot_emulation_http_request_duration_seconds_print_count.csv", header = TRUE, sep = "\t") %>%
  prepare()


latencies <- toto %>%
  mutate(latency = stringr::str_match(tag, "(.+)-(.+)-([0-9]+)-([0-9]+)-([0-9]+)") %>% .[, 4]) %>%
  select(latency) %>%
  distinct() %>%
  filter(!is.na(latency))
# %>%
# filter(latency == 100)

tutu <- toto %>%
  inner_join(titi, by = c("instance", "job", "timestamp", "timestamp_raw", "tag"), ) %>%
  # filter(tag=="echo-001-100-10-10") %>%
# group_by(timestamp_raw) %>%
  # mutate(avg = (value.x / (max(timestamp_raw, na.rm = TRUE) - min(timestamp_raw, na.rm = TRUE))) / (value.y / (max(timestamp_raw, na.rm = TRUE) - min(timestamp_raw, na.rm = TRUE)))) %>%
  group_by(tag) %>%
  # mutate(avg = value.x / value.y) %>%
  # mutate(avg_5last = zoo::rollmean(value.x / value.y, k = 5, fill = NA)) %>%
  mutate(new_x = value.x - dplyr::lag(x = value.x, n = 1, 
                                            order_by = timestamp)) %>% 
  mutate(new_y = value.y - dplyr::lag(x = value.y, n = 1, 
                                              order_by = timestamp)) %>% 
  mutate(new_avg = new_x / new_y) %>% 
  # select(timestamp, avg, tag) %>%
  # filter(!is.na(avg)) %>%
  mutate(function_name = tag) %>%
  mutate(latency = stringr::str_match(function_name, "(.+)-(.+)-([0-9]+)-([0-9]+)-([0-9]+)") %>% .[, 4]) %>%
  mutate(index = stringr::str_match(function_name, "(.+)-(.+)-([0-9]+)-([0-9]+)-([0-9]+)") %>% .[, 3]) %>%
  inner_join(bids_function %>% select(function_name, winner), by=c("function_name"))
  # select(timestamp, avg, tag, winner)

tata <- tutu %>%
  group_by(winner, timestamp, latency) %>%
  summarise(avg = mean(new_avg, na.rm = TRUE))

for (selected_latency in latencies$latency)
{
  print(selected_latency)
 
  
print(
  tutu %>% filter(latency == selected_latency) %>%
  select(function_name) %>%
  distinct()
)


print(
  tutu %>% filter(latency == selected_latency) %>% group_by(tag) %>% mutate(count = max(value.y)) %>% select(index, winner, count, latency) %>% distinct() %>%
  ggplot(aes(x = index, y = count, group = index, color = winner, label = latency)) +
geom_col() +
  labs(x = "Function id", y = "Number of echo requests", title = "Number of requests made to each functions (back in forth=2×latency)", subtitle = paste("Color matches the winner's —",selected_latency,"ms latency"))+
scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 
)

print(
  tutu %>% filter(latency == selected_latency) %>%
  ggplot(aes(x = winner, y = new_avg, group = winner, color = winner, label = latency)) +
  geom_violin() +
labs(x = "time", y = "Avg lat. (s)", title = "Latencies of echo for each functions (back in forth=2×latency).", subtitle = paste("Color matches the winner's —",selected_latency,"ms latency")) +
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE)
)

print(
  tutu %>% filter(latency == selected_latency) %>%
    group_by(function_name) %>%
  ggplot(aes(x = timestamp, y = new_avg, group = index, color = winner, label = latency)) +
  geom_smooth() +
labs(x = "time", y = "Avg lat. (s)", title = "Latencies of echo for each functions (back in forth=2×latency).", subtitle = paste("Color matches the winner's —",selected_latency,"ms latency")) +
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 
)

print(
  tata %>% filter(latency == selected_latency) %>%
    group_by(winner) %>%
  ggplot(aes(x = timestamp, y = avg, group = winner, color = winner, label = latency)) +
  geom_point(size=0.25) +
    geom_smooth(alpha=0.25) +
labs(x = "time", y = "Avg lat. (s)", title = "Latencies of echo for each functions (back in forth=2×latency).", subtitle = paste("Color matches the winner's —",selected_latency,"ms latency")) +
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 
)
}
print(
  tata %>%
    group_by(winner) %>%
  ggplot(aes(x = timestamp, y = avg, group = winner, color = winner, label = latency)) +
  geom_point(size=0.25) +
    geom_smooth(alpha=0.25) +
labs(x = "time", y = "Avg lat. (s)", title = "ALL Latencies of echo for each functions (back in forth=2×latency).", subtitle = paste("Color matches the winner's —",selected_latency,"ms latency")) +
  scale_color_viridis(discrete = TRUE, option = "D") +
  scale_fill_viridis(discrete = TRUE) 
)