In [31]:
knitr::opts_chunk$set(echo = TRUE)

# Install the required packages

In [32]:
options(repos = c(CRAN = "https://cran.rstudio.com/"))

if (!require("ggplot2")) install.packages("ggplot2")
if (!require("e1071")) install.packages("e1071")
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("dplyr")) install.packages("dplyr")
if (!require("effsize")) install.packages("effsize")

In [33]:
install.packages("car")
install.packages('cowplot')
install.packages('grid')
install.packages("png")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

“package ‘grid’ is a base package, and should not be updated”
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [34]:
library("car")
library('cowplot')
library('grid')
library("png")

library(e1071)
library(effsize)

library(ggplot2)
library(tidyverse)
library(dplyr)
library(knitr)


# Define some global variables

In [35]:
PROVOOST <- "provoost"
OURS <- "group3"

TREATMENTS <- c(PROVOOST,OURS)

# Color mapping for On-Device and Remote using R's native colors
COLOR_MAP <- c(provoost = "coral",
               group3 = "lightblue")

PACKET_LOSS <- "avg_packet_loss_percentage"
ENERGY <- "total_energy_consumption_mJ"

METRICS <- c(PACKET_LOSS, ENERGY)

AXIS_LABELS <- list(
  avg_packet_loss_percentage = "Average Packet Loss (%)",
  total_energy_consumption_mJ = "Total Energy Consumption (mJ)"
)

VIOLIN_FOLDER <- "violin_plots"

PLOT_FOLDERS <- c(VIOLIN_FOLDER)

WIDTH = 12
HEIGHT = 12
FONT_MULTIPLIER = 2

# Read the run table file and multiply PACKET_LOSS with 100

In [36]:
# Read the dataset
dataset <- read.csv("./run_table.csv")

print(head(dataset))
cat("Number of rows:", nrow(dataset))

           X__run_id X__done adaptation_mode avg_packet_loss_percentage
1 run_0_repetition_0    DONE          group3                   19.09334
2 run_1_repetition_0    DONE        provoost                   24.81818
3 run_0_repetition_1    DONE          group3                   18.01268
4 run_1_repetition_1    DONE        provoost                   24.63740
5 run_0_repetition_2    DONE          group3                   18.36840
6 run_1_repetition_2    DONE        provoost                   23.95249
  total_energy_consumption_mJ
1                    5487.995
2                    6749.938
3                    4855.074
4                    6163.185
5                    5190.785
6                    6463.909
Number of rows: 60

# H1 Analysis.

## Define a Function to Remove Outliers using IQR method (works on multiple columns of a dataset)

In [37]:
# Function to remove outliers using the IQR method
remove_outliers <- function(data, columns) {
  # Initialize filtered data as the original data
  filtered_data <- data

  # Loop over each column and apply IQR outlier filtering
  for (column in columns) {
    Q1 <- quantile(filtered_data[[column]], 0.25, na.rm = TRUE)
    Q3 <- quantile(filtered_data[[column]], 0.75, na.rm = TRUE)
    IQR_value <- Q3 - Q1
    lower_bound <- Q1 - 1.5 * IQR_value
    upper_bound <- Q3 + 1.5 * IQR_value

    # Filter rows where the column values are within bounds
    filtered_data <- filtered_data %>%
      filter(filtered_data[[column]] >= lower_bound & filtered_data[[column]] <= upper_bound)
  }

  return(filtered_data)
}

## Filter dataset into 2 treatments while removing outliers based on `ENERGY` column
- 2 Treatments:
  - `provoost`
  - `ours`

In [38]:
# Initialize datasets for cleaned and discarded data
cleaned_dataset <- data.frame()
discarded_points <- data.frame()

# Loop over treatments and apply outlier removal
for (method in TREATMENTS) {
  # Filter the dataset for the specific method
  filtered_data <- dataset %>%
    filter(adaptation_mode == !!method)

  # Store original data before filtering
  original_data <- filtered_data

  # Remove outliers using the IQR method
  cleaned_subset <- remove_outliers(filtered_data, METRICS)

  # Identify discarded points as the difference between original and cleaned data
  discarded_subset <- anti_join(original_data, cleaned_subset)

  # Append discarded points to discarded_points dataset
  discarded_points <- bind_rows(discarded_points, discarded_subset)

  # Append the cleaned data to the cleaned_dataset
  cleaned_dataset <- bind_rows(cleaned_dataset, cleaned_subset)
}

# Print the discarded data points
print("Discarded data points (outliers):")
print(discarded_points)


[1m[22mJoining with `by = join_by(X__run_id, X__done, adaptation_mode,
avg_packet_loss_percentage, total_energy_consumption_mJ)`
[1m[22mJoining with `by = join_by(X__run_id, X__done, adaptation_mode,
avg_packet_loss_percentage, total_energy_consumption_mJ)`


[1] "Discarded data points (outliers):"
            X__run_id X__done adaptation_mode avg_packet_loss_percentage
1  run_1_repetition_4    DONE        provoost                   28.19555
2  run_1_repetition_5    DONE        provoost                   27.84792
3  run_1_repetition_6    DONE        provoost                   26.57454
4  run_0_repetition_5    DONE          group3                   20.17976
5  run_0_repetition_6    DONE          group3                   20.49929
6 run_0_repetition_25    DONE          group3                   17.31982
7 run_0_repetition_29    DONE          group3                   16.74003
  total_energy_consumption_mJ
1                    6725.168
2                    6744.831
3                    6839.351
4                    5691.756
5                    5513.936
6                    4412.929
7                    4438.506


In [39]:
print(head(cleaned_dataset))
cat("Number of rows:", nrow(cleaned_dataset))

           X__run_id X__done adaptation_mode avg_packet_loss_percentage
1 run_1_repetition_0    DONE        provoost                   24.81818
2 run_1_repetition_1    DONE        provoost                   24.63740
3 run_1_repetition_2    DONE        provoost                   23.95249
4 run_1_repetition_3    DONE        provoost                   24.66006
5 run_1_repetition_7    DONE        provoost                   23.27455
6 run_1_repetition_8    DONE        provoost                   23.15307
  total_energy_consumption_mJ
1                    6749.938
2                    6163.185
3                    6463.909
4                    6784.684
5                    6827.090
6                    6755.953
Number of rows: 53

## Table: Mean, Median, and Standard Deviation (SD) of Energy Consumption and Packet Loss for Different Treatment (Adaptation Strategies)

In [40]:
# Group by adaptation_mode and calculate mean, median, and SD for each metric
summary_table <- cleaned_dataset %>%
  group_by(adaptation_mode) %>%
  summarise(
    mean_packet_loss = mean(.data[[PACKET_LOSS]], na.rm = TRUE),
    median_packet_loss = median(.data[[PACKET_LOSS]], na.rm = TRUE),
    sd_packet_loss = sd(.data[[PACKET_LOSS]], na.rm = TRUE),
    mean_energy = mean(.data[[ENERGY]], na.rm = TRUE),
    median_energy = median(.data[[ENERGY]], na.rm = TRUE),
    sd_energy = sd(.data[[ENERGY]], na.rm = TRUE)
  ) %>%
  mutate(across(where(is.numeric), ~ sprintf("%.2f", .x))) # Format all numeric values to 2 decimal places

# Print the summary table using knitr for a nicely formatted table
kable(summary_table,
      caption = "Mean, Median, and Standard Deviation (SD) of Energy Consumption and Packet Loss for Different Treatment (Adaptation Strategies)")



Table: Mean, Median, and Standard Deviation (SD) of Energy Consumption and Packet Loss for Different Treatment (Adaptation Strategies)

|adaptation_mode |mean_packet_loss |median_packet_loss |sd_packet_loss |mean_energy |median_energy |sd_energy |
|:---------------|:----------------|:------------------|:--------------|:-----------|:-------------|:---------|
|group3          |17.90            |17.91              |0.47           |5028.95     |5011.28       |221.67    |
|provoost        |23.42            |23.31              |0.61           |6799.31     |6812.47       |311.09    |

# Normality Check

## Visualize data for manual normality check

In [41]:
# Remove all folders listed in PLOT_FOLDERS
for (folder in PLOT_FOLDERS) {
  system(paste("rm -rf", folder))  # Remove the folder if it exists
}

# Loop through the plot folders and create them if they don't exist
for (folder in PLOT_FOLDERS) {
  if (!dir.exists(folder)) {
    dir.create(folder)
  }
}


### Violin Plots

In [42]:
violin_plot <- function(dataset, title, x, y, show_legend = TRUE) {
  ggplot(dataset, aes(x = Distribution, y = Values, fill = Distribution)) +
    geom_violin(alpha = 0.5) +
    geom_boxplot(width = 0.045, outlier.shape = NA) +
    scale_fill_manual(values = COLOR_MAP) +  # Use COLOR_MAP for colors
    labs(
      title = title,
      x = x,
      y = y
    ) +
    theme_minimal() +
    theme(
      legend.position = ifelse(show_legend, "right", "none"),  # Control legend display
      legend.title = element_blank(),
      legend.text = element_text(size = rel(FONT_MULTIPLIER)),  # Increase legend text size
      axis.title = element_text(size = rel(FONT_MULTIPLIER)),
      axis.text = element_text(size = rel(FONT_MULTIPLIER)),
      plot.title = element_text(size = rel(1.1 * FONT_MULTIPLIER))
    )
}

In [43]:
generate_plots_for_metric <- function(metric_name) {
  # Assuming combined_data is already in the correct format:
  # 'combined_data' must have columns: adaptation_mode, Values, Metric (avg_packet_loss or total_energy_consumption_mJ)

  # Create the dataset for the specific metric
  plot_data <- cleaned_dataset %>%
    select(adaptation_mode, all_of(metric_name)) %>%
    gather(key = "Metric", value = "Values", all_of(metric_name)) %>%
    mutate(Distribution = factor(adaptation_mode, levels = TREATMENTS))

  # Create the violin plot title based on the metric
  plot_title_violin <- paste0("Violin Plot for ", AXIS_LABELS[[metric_name]])

  # Generate the violin plot object
  violin_plot_obj <- violin_plot(
    dataset = plot_data,
    title = plot_title_violin,
    x = "Adaptation Strategy",
    y = AXIS_LABELS[[metric_name]],
    show_legend = FALSE
  )

  # Save the violin plot in the specified folder
  ggsave(
    filename = file.path(VIOLIN_FOLDER, paste0("violin_plot_", metric_name, ".png")),
    plot = violin_plot_obj,
    width = WIDTH,
    height = HEIGHT
  )
}

In [44]:
# Loop through each metric and generate plots
for (metric_name in METRICS) {
  generate_plots_for_metric(metric_name)
}

In [45]:
# Function to run Shapiro-Wilk test and print results
test_normality <- function(data, treatment, metric) {
  # Filter data for the specific treatment and metric
  subset_data <- data %>% filter(adaptation_mode == !!treatment) %>% pull(!!metric)

  # Perform Shapiro-Wilk test
  shapiro_result <- shapiro.test(subset_data)

  # Print results
  cat("Treatment:", treatment, "\n",
      "W:", shapiro_result$statistic, "\n",
      "p-value:", shapiro_result$p.value, "\n\n")
}

# Test normality for each treatment on ENERGY column
cat("Shapiro-Wilk Normality Test Results:\n")
test_normality(cleaned_dataset, PROVOOST, ENERGY)
test_normality(cleaned_dataset, OURS, ENERGY)


Shapiro-Wilk Normality Test Results:
Treatment: provoost 
 W: 0.9744266 
 p-value: 0.7210538 

Treatment: group3 
 W: 0.9744272 
 p-value: 0.7393796 



# Hypothesis Testing (Parametric)

In [46]:
# Function to perform two-tailed t-test and compute Cohen's d
compare_treatments <- function(data, metric) {
  # Filter data for each treatment
  group_provoost <- data %>% filter(adaptation_mode == !!PROVOOST) %>% pull(!!metric)
  group_ours <- data %>% filter(adaptation_mode == !!OURS) %>% pull(!!metric)

  # Perform two-tailed t-test
  t_test_result <- t.test(group_provoost, group_ours, var.equal = FALSE)  # Welch's t-test

  # Calculate Cohen's d
  cohen_d_result <- cohen.d(group_provoost, group_ours, pooled = TRUE)

  # Print results
  cat("Metric:", metric, "\n",
      "T-test: t =", t_test_result$statistic,
      ", p-value =", t_test_result$p.value, "\n",
      "Cohen's d:", cohen_d_result$estimate, "\n\n")
}

# Perform the comparison for ENERGY and PACKET_LOSS
cat("Two-Tailed T-Test and Cohen's d Results:\n")
compare_treatments(cleaned_dataset, ENERGY)
compare_treatments(cleaned_dataset, PACKET_LOSS)

Two-Tailed T-Test and Cohen's d Results:
Metric: total_energy_consumption_mJ 
 T-test: t = 23.92776 , p-value = 5.556668e-28 
 Cohen's d: 6.533469 

Metric: avg_packet_loss_percentage 
 T-test: t = 37.23256 , p-value = 2.22421e-37 
 Cohen's d: 10.1793 



# [Only on Google Colab] Download the Visualization Folders

In [47]:
zip("all_folders.zip", files = unlist(lapply(PLOT_FOLDERS, function(folder) {
  list.files(folder, full.names = TRUE, recursive = TRUE)
})))



In [48]:
# Then, manually download the exported .zip