From 9bc4004f8cc328db28db1a6d672424dc6d5e68ed Mon Sep 17 00:00:00 2001
From: "Nikita V. Shirokov" <tehnerd@fb.com>
Date: Thu, 7 Jun 2018 12:15:57 -0700
Subject: [PATCH] sync w/ internal repo.

Summary:
in this sync:
1) updated kernel's includes
2) added support for ICMP "packet too big" generation if recved packet
is bigger then MAX_PCKT_SIZE (turned off by default as this requires
bpf_xdp_adjust_tail helper from 4.17)
3) unittests for #2
4) formating/renaming of few internal structs
---
 build_bpf_modules_opensource.sh               |    1 +
 katran/lib/BpfAdapter.cpp                     |   33 +-
 katran/lib/BpfAdapter.h                       |    2 +-
 katran/lib/BpfLoader.cpp                      |    8 +-
 katran/lib/KatranLb.cpp                       |   13 +-
 katran/lib/KatranLb.h                         |   20 +-
 katran/lib/TARGETS                            |  136 --
 katran/lib/bpf/balancer_consts.h              |   16 +-
 katran/lib/bpf/balancer_helpers.h             |   81 +
 katran/lib/bpf/balancer_kern.c                |   15 +
 katran/lib/bpf/handle_icmp.h                  |  117 +
 katran/lib/linux_includes/bpf.h               | 1966 ++++++++++++++---
 katran/lib/linux_includes/bpf_common.h        |   72 +-
 katran/lib/linux_includes/bpf_helpers.h       |  188 +-
 katran/lib/linux_includes/libbpf.c            |  116 +-
 katran/lib/linux_includes/libbpf.h            |   36 +-
 katran/lib/testing/CMakeLists.txt             |    1 +
 .../lib/testing/KatranOptionalTestFixtures.h  |   59 +
 katran/lib/testing/TARGETS                    |    1 +
 katran/lib/testing/XdpTester.cpp              |   30 +-
 katran/lib/testing/XdpTester.h                |   16 +
 katran/lib/testing/katran_tester.cpp          |   24 +
 22 files changed, 2223 insertions(+), 728 deletions(-)
 delete mode 100644 katran/lib/TARGETS
 create mode 100644 katran/lib/bpf/balancer_helpers.h
 create mode 100644 katran/lib/testing/KatranOptionalTestFixtures.h
diff --git a/build_bpf_modules_opensource.sh b/build_bpf_modules_opensource.sh
index 3da348229..86836ec66 100755
--- a/build_bpf_modules_opensource.sh
+++ b/build_bpf_modules_opensource.sh
@@ -28,5 +28,6 @@ cp ./katran/lib/Makefile-bpf ./deps/linux/bpfprog/Makefile
 cp -r ./katran/lib/bpf ./deps/linux/bpfprog/
 cp ./katran/lib/linux_includes/bpf_helpers.h ./deps/linux/bpfprog/include/
 cd ./deps/linux/bpfprog && LD_LIBRARY_PATH="${CLANG_PATH}/lib" make \
+  EXTRA_CFLAGS="$*" \
   LLC="${CLANG_PATH}/bin/llc" CLANG="${CLANG_PATH}/bin/clang"
 echo "BPF BUILD COMPLITED"
diff --git a/katran/lib/BpfAdapter.cpp b/katran/lib/BpfAdapter.cpp
index bba22c21d..f5c757d7e 100644
--- a/katran/lib/BpfAdapter.cpp
+++ b/katran/lib/BpfAdapter.cpp
@@ -93,7 +93,7 @@ BpfAdapter::BpfAdapter(bool set_limits) {
     struct rlimit lck_mem = {};
     lck_mem.rlim_cur = RLIM_INFINITY;
     lck_mem.rlim_max = RLIM_INFINITY;
-    if(setrlimit(RLIMIT_MEMLOCK, &lck_mem)) {
+    if (setrlimit(RLIMIT_MEMLOCK, &lck_mem)) {
       LOG(ERROR) << "Can't change limit for locked memory";
       throw std::runtime_error("error while setting limit for locked memory");
     }
@@ -131,7 +131,7 @@ int BpfAdapter::createNamedBpfMap(
     int numa_node) {
   const char* name_ptr = !name.empty() ? name.c_str() : nullptr;
 
-  return bpf_create_map_node(
+  return ebpf_create_map_node(
       static_cast<enum bpf_map_type>(type),
       name_ptr,
       key_size,
@@ -154,7 +154,7 @@ int BpfAdapter::bpfUpdateMap(
     void* key,
     void* value,
     unsigned long long flags) {
-  auto bpfError = bpf_update_elem(map_fd, key, value, flags);
+  auto bpfError = ebpf_update_elem(map_fd, key, value, flags);
   if (bpfError) {
     VLOG(4) << "Error while updating value in map: " << std::strerror(errno);
   }
@@ -162,7 +162,7 @@ int BpfAdapter::bpfUpdateMap(
 }
 
 int BpfAdapter::bpfMapLookupElement(int map_fd, void* key, void* value) {
-  auto bpfError = bpf_lookup_elem(map_fd, key, value);
+  auto bpfError = ebpf_lookup_elem(map_fd, key, value);
   if (bpfError) {
     VLOG(4) << "Error while geting value from map: " << std::strerror(errno);
   }
@@ -170,7 +170,7 @@ int BpfAdapter::bpfMapLookupElement(int map_fd, void* key, void* value) {
 }
 
 int BpfAdapter::bpfMapDeleteElement(int map_fd, void* key) {
-  auto bpfError = bpf_delete_elem(map_fd, key);
+  auto bpfError = ebpf_delete_elem(map_fd, key);
   if (bpfError) {
     VLOG(4) << "Error while deleting key from map: " << std::strerror(errno);
   }
@@ -178,7 +178,7 @@ int BpfAdapter::bpfMapDeleteElement(int map_fd, void* key) {
 }
 
 int BpfAdapter::bpfMapGetNextKey(int map_fd, void* key, void* next_key) {
-  auto bpfError = bpf_get_next_key(map_fd, key, next_key);
+  auto bpfError = ebpf_get_next_key(map_fd, key, next_key);
   if (bpfError) {
     VLOG(4) << "Error getting next key from map: " << std::strerror(errno);
   }
@@ -186,11 +186,11 @@ int BpfAdapter::bpfMapGetNextKey(int map_fd, void* key, void* next_key) {
 }
 
 int BpfAdapter::pinBpfObject(int fd, const std::string& path) {
-  return bpf_obj_pin(fd, path.c_str());
+  return ebpf_obj_pin(fd, path.c_str());
 }
 
 int BpfAdapter::getPinnedBpfObject(const std::string& path) {
-  return bpf_obj_get(path.c_str());
+  return ebpf_obj_get(path.c_str());
 }
 
 int BpfAdapter::getInterfaceIndex(const std::string& interface_name) {
@@ -298,7 +298,7 @@ int BpfAdapter::testXdpProg(
     uint32_t* size_out,
     uint32_t* retval,
     uint32_t* duration) {
-  return bpf_prog_test_run(
+  return ebpf_prog_test_run(
       prog_fd, repeat, data, data_size, data_out, size_out, retval, duration);
 }
 
@@ -460,7 +460,14 @@ int BpfAdapter::modifyTcBpfFilter(
     const int prog_fd,
     const unsigned int ifindex,
     const std::string& bpf_name,
-    const int direction) {
+    const int direction)
+// TODO: T30063437 fix null-pointer-use undefined behavior
+#if defined(__has_feature)
+#if __has_feature(__address_sanitizer__)
+    __attribute__((__no_sanitize__("null")))
+#endif
+#endif
+{
   char buf[MNL_SOCKET_BUFFER_SIZE];
   struct nlmsghdr* nlh;
   struct tcmsg* tc;
@@ -572,7 +579,7 @@ int BpfAdapter::attachCgroupProg(
   SCOPE_EXIT {
     ::close(target_fd);
   };
-  return bpf_prog_attach(prog_fd, target_fd, type, flags);
+  return ebpf_prog_attach(prog_fd, target_fd, type, flags);
 }
 
 int BpfAdapter::detachCgroupProg(
@@ -585,7 +592,7 @@ int BpfAdapter::detachCgroupProg(
   SCOPE_EXIT {
     ::close(target_fd);
   };
-  return bpf_prog_detach(target_fd, type);
+  return ebpf_prog_detach(target_fd, type);
 }
 
 int BpfAdapter::detachCgroupProg(
@@ -599,7 +606,7 @@ int BpfAdapter::detachCgroupProg(
   SCOPE_EXIT {
     ::close(target_fd);
   };
-  return bpf_prog_detach2(prog_fd, target_fd, type);
+  return ebpf_prog_detach2(prog_fd, target_fd, type);
 }
 
 } // namespace katran
diff --git a/katran/lib/BpfAdapter.h b/katran/lib/BpfAdapter.h
index e3b49adfd..f384ea892 100644
--- a/katran/lib/BpfAdapter.h
+++ b/katran/lib/BpfAdapter.h
@@ -52,7 +52,7 @@ constexpr unsigned int kBpfMapTypeHashOfMaps = 13;
  */
 class BpfAdapter {
  public:
-  explicit BpfAdapter(bool set_limits=true);
+  explicit BpfAdapter(bool set_limits = true);
 
   // BpfAdapter is not thread safe.  Discourage unsafe use by disabling copy
   // construction/assignment.
diff --git a/katran/lib/BpfLoader.cpp b/katran/lib/BpfLoader.cpp
index 689ef45e8..c388fe535 100644
--- a/katran/lib/BpfLoader.cpp
+++ b/katran/lib/BpfLoader.cpp
@@ -225,14 +225,14 @@ int BpfLoader::loadMaps(Elf* elf) {
                    << "map-in-map prototype";
         return 1;
       }
-      map_fd = bpf_create_map_in_map(
+      map_fd = ebpf_create_map_in_map(
           static_cast<enum bpf_map_type>(maps[i].type),
           maps[i].key_size,
           inner_map_fd,
           maps[i].max_entries,
           maps[i].map_flags);
     } else {
-      map_fd = bpf_create_map(
+      map_fd = ebpf_create_map(
           static_cast<enum bpf_map_type>(maps[i].type),
           maps[i].key_size,
           maps[i].value_size,
@@ -579,7 +579,7 @@ int BpfLoader::loadBpfProgs() {
             << "\nlicense: " << license_
             << "\nkernel version: " << kernelVersion_;
     std::string bpf_log_buf(kLogBufSize, '\0');
-    auto prog_fd = bpf_prog_load(
+    auto prog_fd = ebpf_prog_load(
         prog_iter.second.type,
         prog_iter.second.insns,
         prog_iter.second.size,
@@ -607,7 +607,7 @@ int BpfLoader::loadBpfFile(const std::string& path, const bpf_prog_type type) {
   int fd = -1;
   SCOPE_EXIT {
     elf_end(elf_);
-    if (0 < fd) {
+    if (fd > 0) {
       ::close(fd);
     }
   };
diff --git a/katran/lib/KatranLb.cpp b/katran/lib/KatranLb.cpp
index c982cbc84..2b7e0c6e6 100644
--- a/katran/lib/KatranLb.cpp
+++ b/katran/lib/KatranLb.cpp
@@ -668,18 +668,19 @@ lb_stats KatranLb::getStatsForVip(const VipKey& vip) {
 }
 
 lb_stats KatranLb::getLruStats() {
-  uint32_t lru_cntr_pos = maxVips_ + kLruCntrOffset;
-  return getLbStats(lru_cntr_pos);
+  return getLbStats(maxVips_ + kLruCntrOffset);
 }
 
 lb_stats KatranLb::getLruMissStats() {
-  uint32_t lru_miss_pos = maxVips_ + kLruMissOffset;
-  return getLbStats(lru_miss_pos);
+  return getLbStats(maxVips_ + kLruMissOffset);
 }
 
 lb_stats KatranLb::getLruFallbackStats() {
-  uint32_t lru_fallback_pos = maxVips_ + kLruFallbackOffset;
-  return getLbStats(lru_fallback_pos);
+  return getLbStats(maxVips_ + kLruFallbackOffset);
+}
+
+lb_stats KatranLb::getIcmpTooBigStats() {
+  return getLbStats(maxVips_ + kIcmpTooBigOffset);
 }
 
 lb_stats KatranLb::getLbStats(uint32_t position) {
diff --git a/katran/lib/KatranLb.h b/katran/lib/KatranLb.h
index 1f9c6f11f..04f7f273d 100644
--- a/katran/lib/KatranLb.h
+++ b/katran/lib/KatranLb.h
@@ -45,6 +45,7 @@ constexpr int kMainIntfPos = 3;
 constexpr uint32_t kLruCntrOffset = 0;
 constexpr uint32_t kLruMissOffset = 1;
 constexpr uint32_t kLruFallbackOffset = 3;
+constexpr uint32_t kIcmpTooBigOffset = 4;
 
 /**
  * LRU map related constants
@@ -213,7 +214,7 @@ class KatranLb {
    * @param VipKey vip
    * @return struct lb_stats w/ statistic for specified vip
    *
-   * helper function which return totall ammount of pkts and bytes which
+   * helper function which return total ammount of pkts and bytes which
    * has been sent to specified vip. it's up to external entity to calculate
    * actual speed in pps/bps
    */
@@ -222,7 +223,7 @@ class KatranLb {
   /**
    * @return struct lb_stats w/ statistics for lru misses
    *
-   * helper function which returns totall amount of processed packets and
+   * helper function which returns total amount of processed packets and
    * how much of em was lru misses (when we wasnt able to find entry in
    * connection table)
    */
@@ -231,7 +232,7 @@ class KatranLb {
   /**
    * @return struct lb_stats w/ statistic of the reasons for lru misses
    *
-   * helper function which return totall amount of tcp lru misses because of
+   * helper function which returns total amount of tcp lru misses because of
    * the tcp syns (v1) or non-syns (v2)
    */
   lb_stats getLruMissStats();
@@ -239,11 +240,20 @@ class KatranLb {
   /**
    * @return struct lb_stats w/ statistic of fallback lru hits
    *
-   * helper function which return totall amount of numbers when we fel back
+   * helper function which return total amount of numbers when we fel back
    * to fallback_lru (v1);
    */
   lb_stats getLruFallbackStats();
 
+  /**
+   * @return struct lb_stats w/ statistic of icmp packet too big packets
+   *
+   * helper function which returns how many icmpv4/icmpv6 packet too big
+   * has been generated after we have received packet, which is bigger then
+   * maximum supported size.
+   */
+  lb_stats getIcmpTooBigStats();
+
   /**
    * @param uint32_t somark of the packet
    * @param std::string dst for a packed w/ specified so_mark
@@ -473,7 +483,7 @@ class KatranLb {
   std::vector<int> lruMapsFd_;
 
   /**
-   * totall LRUs map size; each forwarding cpu/core will have
+   * total LRUs map size; each forwarding cpu/core will have
    * total_size/forwarding_cores entries
    */
   uint64_t totalLruSize_;
diff --git a/katran/lib/TARGETS b/katran/lib/TARGETS
deleted file mode 100644
index 7b315c4d4..000000000
--- a/katran/lib/TARGETS
+++ /dev/null
@@ -1,136 +0,0 @@
-cpp_library(
-    name = "bpfadapter",
-    srcs = [
-        "BpfAdapter.cpp",
-        "BpfLoader.cpp",
-        "linux_includes/libbpf.c",
-    ],
-    headers = [
-        "BpfAdapter.h",
-        "BpfLoader.h",
-        "BpfLoaderStructs.h",
-        "linux_includes/bpf.h",
-        "linux_includes/bpf_common.h",
-        "linux_includes/bpf_helpers.h",
-        "linux_includes/libbpf.h",
-    ],
-    deps = [
-        "//folly:scope_guard",
-    ],
-    external_deps = [
-        "glog",
-        ("libelf", "any", "elf"),
-        ("libmnl", None, "mnl"),
-    ],
-)
-
-cpp_library(
-    name = "iphelpers",
-    srcs = [
-        "IpHelpers.cpp",
-    ],
-    headers = [
-        "IpHelpers.h",
-    ],
-    deps = [
-        "//folly:network_address",
-        "//folly/lang:bits",
-    ],
-)
-
-cpp_library(
-    name = "chhelpers",
-    srcs = [
-        "CHHelpers.cpp",
-    ],
-    headers = [
-        "CHHelpers.h",
-    ],
-    deps = [
-        ":murmurhash3",
-    ],
-)
-
-cpp_library(
-    name = "libkatran",
-    srcs = [
-        "KatranLb.cpp",
-        "Vip.cpp",
-    ],
-    headers = [
-        "KatranLb.h",
-        "KatranLbStructs.h",
-        "Vip.h",
-    ],
-    deps = [
-        ":balancer_structs",
-        ":bpfadapter",
-        ":chhelpers",
-        ":iphelpers",
-        "//folly:format",
-        "//folly:network_address",
-        "//folly/lang:bits",
-    ],
-    external_deps = [
-        "gflags",
-        "glog",
-    ],
-)
-
-cpp_library(
-    name = "balancer_structs",
-    headers = [
-        "BalancerStructs.h",
-    ],
-)
-
-cpp_library(
-    name = "murmurhash3",
-    srcs = [
-        "MurmurHash3.cpp",
-    ],
-    headers = [
-        "MurmurHash3.h",
-    ],
-)
-
-cpp_library(
-    name = "machelpers",
-    srcs = [
-        "MacHelpers.cpp",
-    ],
-    headers = [
-        "MacHelpers.h",
-    ],
-    deps = [
-        "//folly:format",
-        "//folly:network_address",
-    ],
-)
-
-cpp_binary(
-    name = "xdproot",
-    srcs = [
-        "xdproot.cpp",
-    ],
-    deps = [
-        ":bpfadapter",
-    ],
-    external_deps = [
-        "gflags",
-        "glog",
-    ],
-)
-
-cpp_binary(
-    name = "maglev_integration_test",
-    srcs = [
-        "maglev_integration_test.cpp",
-    ],
-    deps = [
-        ":chhelpers",
-    ],
-    external_deps = [
-        "gflags",
-    ],
-)
diff --git a/katran/lib/bpf/balancer_consts.h b/katran/lib/bpf/balancer_consts.h
index d9490cb27..5208b8f16 100644
--- a/katran/lib/bpf/balancer_consts.h
+++ b/katran/lib/bpf/balancer_consts.h
@@ -118,13 +118,24 @@
 #define MAX_PCKT_SIZE 1514
 #endif
 
+// for v4 and v6: initial packet would be truncated to the size of eth header
+// plus ipv4/ipv6 header and few bytes of payload
+#define ICMP_TOOBIG_SIZE 98
+#define ICMP6_TOOBIG_SIZE 262
+
+
+#define ICMP6_TOOBIG_PAYLOAD_SIZE (ICMP6_TOOBIG_SIZE - 6)
+#define ICMP_TOOBIG_PAYLOAD_SIZE (ICMP_TOOBIG_SIZE - 6)
+
 #define NO_FLAGS 0
 
-// offset of the lru cache hit related cntrs
+// offset of the lru cache hit related counters
 #define LRU_CNTRS 0
 #define LRU_MISS_CNTR 1
 #define NEW_CONN_RATE_CNTR 2
 #define FALLBACK_LRU_CNTR 3
+//offset of icmp related counters
+#define ICMP_TOOBIG_CNTRS 4
 
 // max ammount of new connections per seconda per core for lru update
 // if we go beyond this value - we will bypass lru update.
@@ -154,4 +165,7 @@
 #define IPIP_V6_PREFIX3 0
 #endif
 
+// optional features (requires kernel support. turned off by default)
+//#define ICMP_TOOBIG_GENERATION
+
 #endif // of __BALANCER_CONSTS_H
diff --git a/katran/lib/bpf/balancer_helpers.h b/katran/lib/bpf/balancer_helpers.h
new file mode 100644
index 000000000..47a5b9381
--- /dev/null
+++ b/katran/lib/bpf/balancer_helpers.h
@@ -0,0 +1,81 @@
+/* Copyright (C) 2018-present, Facebook, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef __BALANCER_HELPERS
+#define __BALANCER_HELPERS
+/*
+ * This file contains common used routines. such as csum helpers etc
+ */
+
+#include <uapi/linux/ipv6.h>
+
+#include "bpf_helpers.h"
+
+#define bpf_printk(fmt, ...)                                    \
+({                                                              \
+               char ____fmt[] = fmt;                            \
+               bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                                ##__VA_ARGS__);                 \
+})
+
+
+__attribute__((__always_inline__))
+static inline __u16 csum_fold_helper(__u64 csum) {
+  int i;
+#pragma unroll
+  for (i = 0; i < 4; i ++) {
+    if (csum >> 16)
+      csum = (csum & 0xffff) + (csum >> 16);
+  }
+  return ~csum;
+}
+
+__attribute__((__always_inline__))
+static inline void ipv4_csum(void *data_start, int data_size,  __u64 *csum) {
+  *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
+  *csum = csum_fold_helper(*csum);
+}
+
+__attribute__((__always_inline__))
+static inline void ipv4_l4_csum(void *data_start, int data_size,
+                                __u64 *csum, struct iphdr *iph) {
+  __u32 tmp = 0;
+  *csum = bpf_csum_diff(0, 0, &iph->saddr, sizeof(__be32), *csum);
+  *csum = bpf_csum_diff(0, 0, &iph->daddr, sizeof(__be32), *csum);
+  tmp = __builtin_bswap32((__u32)(iph->protocol));
+  *csum = bpf_csum_diff(0, 0, &tmp, sizeof(__u32), *csum);
+  tmp = __builtin_bswap32((__u32)(data_size));
+  *csum = bpf_csum_diff(0, 0, &tmp, sizeof(__u32), *csum);
+  *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
+  *csum = csum_fold_helper(*csum);
+}
+
+__attribute__((__always_inline__))
+static inline void ipv6_csum(void *data_start, int data_size,
+                             __u64 *csum, struct ipv6hdr *ip6h) {
+  // ipv6 pseudo header
+  __u32 tmp = 0;
+  *csum = bpf_csum_diff(0, 0, &ip6h->saddr, sizeof(struct in6_addr), *csum);
+  *csum = bpf_csum_diff(0, 0, &ip6h->daddr, sizeof(struct in6_addr), *csum);
+  tmp = __builtin_bswap32((__u32)(data_size));
+  *csum = bpf_csum_diff(0, 0, &tmp, sizeof(__u32), *csum);
+  tmp = __builtin_bswap32((__u32)(ip6h->nexthdr));
+  *csum = bpf_csum_diff(0, 0, &tmp, sizeof(__u32), *csum);
+  // sum over payload
+  *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
+  *csum = csum_fold_helper(*csum);
+}
+#endif
diff --git a/katran/lib/bpf/balancer_kern.c b/katran/lib/bpf/balancer_kern.c
index 498d55080..6dd3e26de 100644
--- a/katran/lib/bpf/balancer_kern.c
+++ b/katran/lib/bpf/balancer_kern.c
@@ -310,7 +310,22 @@ static inline int process_packet(void *data, __u64 off, void *data_end,
   }
 
   if (data_end - data > MAX_PCKT_SIZE) {
+#ifdef ICMP_TOOBIG_GENERATION
+    __u32 stats_key = MAX_VIPS + ICMP_TOOBIG_CNTRS;
+    BUILD_BUG_ON(stats_key >= STATS_MAP_SIZE);
+    data_stats = bpf_map_lookup_elem(&stats, &stats_key);
+    if (!data_stats) {
+      return XDP_DROP;
+    }
+    if (is_ipv6) {
+      data_stats->v2 += 1;
+    } else {
+      data_stats->v1 += 1;
+    }
+    return send_icmp_too_big(xdp, is_ipv6, data_end - data);
+#else
     return XDP_DROP;
+#endif
   }
 
   __u32 stats_key = MAX_VIPS + LRU_CNTRS;
diff --git a/katran/lib/bpf/handle_icmp.h b/katran/lib/bpf/handle_icmp.h
index c4c096b6a..4e63bc7e3 100644
--- a/katran/lib/bpf/handle_icmp.h
+++ b/katran/lib/bpf/handle_icmp.h
@@ -32,6 +32,7 @@
 
 #include "balancer_consts.h"
 #include "balancer_structs.h"
+#include "balancer_helpers.h"
 
 __attribute__((__always_inline__))
 static inline int swap_mac_and_send(void *data, void *data_end) {
@@ -44,6 +45,15 @@ static inline int swap_mac_and_send(void *data, void *data_end) {
   return XDP_TX;
 }
 
+__attribute__((__always_inline__))
+static inline void swap_mac(void *data, struct eth_hdr *orig_eth) {
+  struct eth_hdr *eth;
+  eth = data;
+  memcpy(eth->eth_source, orig_eth->eth_dest , ETH_ALEN);
+  memcpy(eth->eth_dest, orig_eth->eth_source, ETH_ALEN);
+  eth->eth_proto = orig_eth->eth_proto;
+}
+
 __attribute__((__always_inline__))
 static inline int send_icmp_reply(void *data, void *data_end) {
   struct iphdr *iph;
@@ -106,6 +116,113 @@ static inline int send_icmp6_reply(void *data, void *data_end) {
   return swap_mac_and_send(data, data_end);
 }
 
+__attribute__((__always_inline__))
+static inline int send_icmp4_too_big(struct xdp_md *xdp) {
+  int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr);
+  if (bpf_xdp_adjust_head(xdp, 0 - headroom)) {
+    return XDP_DROP;
+  }
+  void *data = xdp->data;
+  void *data_end = xdp->data_end;
+  if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end) {
+    return XDP_DROP;
+  }
+  struct iphdr *iph, *orig_iph;
+  struct eth_hdr *orig_eth;
+  struct icmphdr *icmp_hdr;
+  __u64 csum = 0;
+  __u64 off = 0;
+  orig_eth = data + headroom;
+  swap_mac(data, orig_eth);
+  off += sizeof(struct eth_hdr);
+  iph = data + off;
+  off += sizeof(struct iphdr);
+  icmp_hdr = data + off;
+  off += sizeof(struct icmphdr);
+  orig_iph = data + off;
+  icmp_hdr->type = ICMP_DEST_UNREACH;
+  icmp_hdr->code = ICMP_FRAG_NEEDED;
+  icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct eth_hdr));
+  icmp_hdr->checksum = 0;
+  ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
+  icmp_hdr->checksum = csum;
+  iph->ttl = DEFAULT_TTL;
+  iph->daddr = orig_iph->saddr;
+  iph->saddr = orig_iph->daddr;
+  iph->version = 4;
+  iph->ihl = 5;
+  iph->protocol = IPPROTO_ICMP;
+  iph->tos = 0;
+  iph->tot_len = htons(ICMP_TOOBIG_SIZE + headroom - sizeof(struct eth_hdr));
+  iph->check = 0;
+  csum = 0;
+  ipv4_csum(iph, sizeof(struct iphdr), &csum);
+  iph->check = csum;
+  return XDP_TX;
+}
+
+__attribute__((__always_inline__))
+static inline int send_icmp6_too_big(struct xdp_md *xdp) {
+  int headroom = (int)sizeof(struct ipv6hdr) + (int)sizeof(struct icmp6hdr);
+  if (bpf_xdp_adjust_head(xdp, 0 - headroom)) {
+    return XDP_DROP;
+  }
+  void *data = xdp->data;
+  void *data_end = xdp->data_end;
+  if (data + (ICMP6_TOOBIG_SIZE + headroom) > data_end) {
+    return XDP_DROP;
+  }
+  struct ipv6hdr *ip6h, *orig_ip6h;
+  struct eth_hdr *orig_eth;
+  struct icmp6hdr *icmp6_hdr;
+  __u64 csum = 0;
+  __u64 off = 0;
+  orig_eth = data + headroom;
+  swap_mac(data, orig_eth);
+  off += sizeof(struct eth_hdr);
+  ip6h = data + off;
+  off += sizeof(struct ipv6hdr);
+  icmp6_hdr = data + off;
+  off += sizeof(struct icmp6hdr);
+  orig_ip6h = data + off;
+  ip6h->version = 6;
+  ip6h->priority = 0;
+  ip6h->nexthdr = IPPROTO_ICMPV6;
+  ip6h->hop_limit = DEFAULT_TTL;
+  ip6h->payload_len = htons(ICMP6_TOOBIG_PAYLOAD_SIZE);
+  memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+  memcpy(ip6h->daddr.s6_addr32, orig_ip6h->saddr.s6_addr32, 16);
+  memcpy(ip6h->saddr.s6_addr32, orig_ip6h->daddr.s6_addr32, 16);
+  icmp6_hdr->icmp6_type = ICMPV6_PKT_TOOBIG;
+  icmp6_hdr->icmp6_code = 0;
+  icmp6_hdr->icmp6_mtu = htonl(MAX_PCKT_SIZE-sizeof(struct eth_hdr));
+  icmp6_hdr->icmp6_cksum = 0;
+  ipv6_csum(icmp6_hdr, ICMP6_TOOBIG_PAYLOAD_SIZE, &csum, ip6h);
+  icmp6_hdr->icmp6_cksum = csum;
+  return XDP_TX;
+}
+
+__attribute__((__always_inline__))
+static inline int send_icmp_too_big(struct xdp_md *xdp,
+                                    bool is_ipv6, int pckt_size) {
+
+  int offset = pckt_size;
+  if (is_ipv6) {
+    offset -= ICMP6_TOOBIG_SIZE;
+  } else {
+    offset -= ICMP_TOOBIG_SIZE;
+  }
+  if(bpf_xdp_adjust_tail(xdp, 0 - offset)) {
+    return XDP_DROP;
+  }
+  if (is_ipv6) {
+    return send_icmp6_too_big(xdp);
+  } else {
+    return send_icmp4_too_big(xdp);
+  }
+}
+
+
 __attribute__((__always_inline__))
 static inline int parse_icmpv6(void *data, void *data_end, __u64 off,
                                struct packet_description *pckt) {
diff --git a/katran/lib/linux_includes/bpf.h b/katran/lib/linux_includes/bpf.h
index c0855392e..df38ce07a 100644
--- a/katran/lib/linux_includes/bpf.h
+++ b/katran/lib/linux_includes/bpf.h
@@ -1,4 +1,6 @@
-/* This program is free software; you can redistribute it and/or
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
@@ -14,7 +16,7 @@
 #define BPF_ALU64 0x07  /* alu mode in double word width */
 
 /* ld/ldx fields */
-#define BPF_DW    0x18  /* double word */
+#define BPF_DW    0x18  /* double word (64-bit) */
 #define BPF_XADD  0xc0  /* exclusive add */
 
 /* alu/jmp fields */
@@ -91,6 +93,8 @@ enum bpf_cmd {
   BPF_MAP_GET_FD_BY_ID,
   BPF_OBJ_GET_INFO_BY_FD,
   BPF_PROG_QUERY,
+  BPF_RAW_TRACEPOINT_OPEN,
+  BPF_BTF_LOAD,
 };
 
 enum bpf_map_type {
@@ -130,6 +134,9 @@ enum bpf_prog_type {
   BPF_PROG_TYPE_SOCK_OPS,
   BPF_PROG_TYPE_SK_SKB,
   BPF_PROG_TYPE_CGROUP_DEVICE,
+  BPF_PROG_TYPE_SK_MSG,
+  BPF_PROG_TYPE_RAW_TRACEPOINT,
+  BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 };
 
 enum bpf_attach_type {
@@ -140,6 +147,13 @@ enum bpf_attach_type {
   BPF_SK_SKB_STREAM_PARSER,
   BPF_SK_SKB_STREAM_VERDICT,
   BPF_CGROUP_DEVICE,
+  BPF_SK_MSG_VERDICT,
+  BPF_CGROUP_INET4_BIND,
+  BPF_CGROUP_INET6_BIND,
+  BPF_CGROUP_INET4_CONNECT,
+  BPF_CGROUP_INET6_CONNECT,
+  BPF_CGROUP_INET4_POST_BIND,
+  BPF_CGROUP_INET6_POST_BIND,
   __MAX_BPF_ATTACH_TYPE
 };
 
@@ -228,6 +242,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY    (1U << 3)
 #define BPF_F_WRONLY    (1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID  (1U << 5)
+
+enum bpf_stack_build_id_status {
+  /* user space need an empty entry to identify end of a trace */
+  BPF_STACK_BUILD_ID_EMPTY = 0,
+  /* with valid build_id and offset */
+  BPF_STACK_BUILD_ID_VALID = 1,
+  /* couldn't get build_id, fallback to ip */
+  BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+  __s32   status;
+  unsigned char build_id[BPF_BUILD_ID_SIZE];
+  union {
+    __u64 offset;
+    __u64 ip;
+  };
+};
+
 union bpf_attr {
   struct { /* anonymous struct used by BPF_MAP_CREATE command */
     __u32 map_type; /* one of enum bpf_map_type */
@@ -242,6 +278,10 @@ union bpf_attr {
            * BPF_F_NUMA_NODE is set).
            */
     char  map_name[BPF_OBJ_NAME_LEN];
+    __u32 map_ifindex;  /* ifindex of netdev to create on */
+    __u32 btf_fd;   /* fd pointing to a BTF type data */
+    __u32 btf_key_id; /* BTF type_id of the key */
+    __u32 btf_value_id; /* BTF type_id of the value */
   };
 
   struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -266,6 +306,11 @@ union bpf_attr {
     __u32   prog_flags;
     char    prog_name[BPF_OBJ_NAME_LEN];
     __u32   prog_ifindex; /* ifindex of netdev to prep for */
+    /* For some prog types expected attach type must be known at
+     * load time to verify attach type specific parts of prog
+     * (context accesses, allowed helpers, etc).
+     */
+    __u32   expected_attach_type;
   };
 
   struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -316,374 +361,1445 @@ union bpf_attr {
     __aligned_u64 prog_ids;
     __u32   prog_cnt;
   } query;
+
+  struct {
+    __u64 name;
+    __u32 prog_fd;
+  } raw_tracepoint;
+
+  struct { /* anonymous struct for BPF_BTF_LOAD */
+    __aligned_u64 btf;
+    __aligned_u64 btf_log_buf;
+    __u32   btf_size;
+    __u32   btf_log_size;
+    __u32   btf_log_level;
+  };
 } __attribute__((aligned(8)));
 
-/* BPF helper function descriptions:
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ *     $ ./scripts/bpf_helpers_doc.py \
+ *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ *     $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ *  Description
+ *    Perform a lookup in *map* for an entry associated to *key*.
+ *  Return
+ *    Map value associated to *key*, or **NULL** if no entry was
+ *    found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ *  Description
+ *    Add or update the value of the entry associated to *key* in
+ *    *map* with *value*. *flags* is one of:
  *
- * void *bpf_map_lookup_elem(&map, &key)
- *     Return: Map value or NULL
+ *    **BPF_NOEXIST**
+ *      The entry for *key* must not exist in the map.
+ *    **BPF_EXIST**
+ *      The entry for *key* must already exist in the map.
+ *    **BPF_ANY**
+ *      No condition on the existence of the entry for *key*.
  *
- * int bpf_map_update_elem(&map, &key, &value, flags)
- *     Return: 0 on success or negative error
+ *    Flag value **BPF_NOEXIST** cannot be used for maps of types
+ *    **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY**  (all
+ *    elements always exist), the helper would return an error.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
  *
- * int bpf_map_delete_elem(&map, &key)
- *     Return: 0 on success or negative error
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ *  Description
+ *    Delete entry with *key* from *map*.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read(void *dst, int size, void *src)
- *     Return: 0 on success or negative error
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ *  Description
+ *    For tracing programs, safely attempt to read *size* bytes from
+ *    address *src* and store the data in *dst*.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
  *
  * u64 bpf_ktime_get_ns(void)
- *     Return: current ktime
- *
- * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
- *     Return: length of buffer written or negative error
- *
- * u32 bpf_prandom_u32(void)
- *     Return: random value
- *
- * u32 bpf_raw_smp_processor_id(void)
- *     Return: SMP processor ID
- *
- * int bpf_skb_store_bytes(skb, offset, from, len, flags)
- *     store bytes into packet
- *     @skb: pointer to skb
- *     @offset: offset within packet from skb->mac_header
- *     @from: pointer where to copy bytes from
- *     @len: number of bytes to store into packet
- *     @flags: bit 0 - if true, recompute skb->csum
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l3_csum_replace(skb, offset, from, to, flags)
- *     recompute IP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where IP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l4_csum_replace(skb, offset, from, to, flags)
- *     recompute TCP/UDP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where TCP/UDP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             bit 4 - is pseudo header
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_tail_call(ctx, prog_array_map, index)
- *     jump into another BPF program
- *     @ctx: context pointer passed to next program
- *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- *     @index: 32-bit index inside array that selects specific program to run
- *     Return: 0 on success or negative error
- *
- * int bpf_clone_redirect(skb, ifindex, flags)
- *     redirect to another netdev
- *     @skb: pointer to skb
- *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: 0 on success or negative error
+ *  Description
+ *    Return the time elapsed since system boot, in nanoseconds.
+ *  Return
+ *    Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ *  Description
+ *    This helper is a "printk()-like" facility for debugging. It
+ *    prints a message defined by format *fmt* (of size *fmt_size*)
+ *    to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ *    available. It can take up to three additional **u64**
+ *    arguments (as an eBPF helpers, the total number of arguments is
+ *    limited to five).
+ *
+ *    Each time the helper is called, it appends a line to the trace.
+ *    The format of the trace is customizable, and the exact output
+ *    one will get depends on the options set in
+ *    *\/sys/kernel/debug/tracing/trace_options* (see also the
+ *    *README* file under the same directory). However, it usually
+ *    defaults to something like:
+ *
+ *    ::
+ *
+ *      telnet-470   [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ *    In the above:
+ *
+ *      * ``telnet`` is the name of the current task.
+ *      * ``470`` is the PID of the current task.
+ *      * ``001`` is the CPU number on which the task is
+ *        running.
+ *      * In ``.N..``, each character refers to a set of
+ *        options (whether irqs are enabled, scheduling
+ *        options, whether hard/softirqs are running, level of
+ *        preempt_disabled respectively). **N** means that
+ *        **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ *        are set.
+ *      * ``419421.045894`` is a timestamp.
+ *      * ``0x00000001`` is a fake value used by BPF for the
+ *        instruction pointer register.
+ *      * ``<formatted msg>`` is the message formatted with
+ *        *fmt*.
+ *
+ *    The conversion specifiers supported by *fmt* are similar, but
+ *    more limited than for printk(). They are **%d**, **%i**,
+ *    **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ *    **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ *    of field, padding with zeroes, etc.) is available, and the
+ *    helper will return **-EINVAL** (but print nothing) if it
+ *    encounters an unknown specifier.
+ *
+ *    Also, note that **bpf_trace_printk**\ () is slow, and should
+ *    only be used for debugging purposes. For this reason, a notice
+ *    bloc (spanning several lines) is printed to kernel logs and
+ *    states that the helper should not be used "for production use"
+ *    the first time this helper is used (or more precisely, when
+ *    **trace_printk**\ () buffers are allocated). For passing values
+ *    to user space, perf events should be preferred.
+ *  Return
+ *    The number of bytes written to the buffer, or a negative error
+ *    in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ *  Description
+ *    Get a pseudo-random number.
+ *
+ *    From a security point of view, this helper uses its own
+ *    pseudo-random internal state, and cannot be used to infer the
+ *    seed of other random functions in the kernel. However, it is
+ *    essential to note that the generator used by the helper is not
+ *    cryptographically secure.
+ *  Return
+ *    A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ *  Description
+ *    Get the SMP (symmetric multiprocessing) processor id. Note that
+ *    all programs run with preemption disabled, which means that the
+ *    SMP processor id is stable during all the execution of the
+ *    program.
+ *  Return
+ *    The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ *  Description
+ *    Store *len* bytes from address *from* into the packet
+ *    associated to *skb*, at *offset*. *flags* are a combination of
+ *    **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ *    checksum for the packet after storing the bytes) and
+ *    **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ *    **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ *  Description
+ *    Recompute the layer 3 (e.g. IP) checksum for the packet
+ *    associated to *skb*. Computation is incremental, so the helper
+ *    must know the former value of the header field that was
+ *    modified (*from*), the new value of this field (*to*), and the
+ *    number of bytes (2 or 4) for this field, stored in *size*.
+ *    Alternatively, it is possible to store the difference between
+ *    the previous and the new values of the header field in *to*, by
+ *    setting *from* and *size* to 0. For both methods, *offset*
+ *    indicates the location of the IP checksum within the packet.
+ *
+ *    This helper works in combination with **bpf_csum_diff**\ (),
+ *    which does not update the checksum in-place, but offers more
+ *    flexibility and can handle sizes larger than 2 or 4 for the
+ *    checksum to update.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ *  Description
+ *    Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ *    packet associated to *skb*. Computation is incremental, so the
+ *    helper must know the former value of the header field that was
+ *    modified (*from*), the new value of this field (*to*), and the
+ *    number of bytes (2 or 4) for this field, stored on the lowest
+ *    four bits of *flags*. Alternatively, it is possible to store
+ *    the difference between the previous and the new values of the
+ *    header field in *to*, by setting *from* and the four lowest
+ *    bits of *flags* to 0. For both methods, *offset* indicates the
+ *    location of the IP checksum within the packet. In addition to
+ *    the size of the field, *flags* can be added (bitwise OR) actual
+ *    flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ *    untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ *    for updates resulting in a null checksum the value is set to
+ *    **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ *    the checksum is to be computed against a pseudo-header.
+ *
+ *    This helper works in combination with **bpf_csum_diff**\ (),
+ *    which does not update the checksum in-place, but offers more
+ *    flexibility and can handle sizes larger than 2 or 4 for the
+ *    checksum to update.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ *  Description
+ *    This special helper is used to trigger a "tail call", or in
+ *    other words, to jump into another eBPF program. The same stack
+ *    frame is used (but values on stack and in registers for the
+ *    caller are not accessible to the callee). This mechanism allows
+ *    for program chaining, either for raising the maximum number of
+ *    available eBPF instructions, or to execute given programs in
+ *    conditional blocks. For security reasons, there is an upper
+ *    limit to the number of successive tail calls that can be
+ *    performed.
+ *
+ *    Upon call of this helper, the program attempts to jump into a
+ *    program referenced at index *index* in *prog_array_map*, a
+ *    special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ *    *ctx*, a pointer to the context.
+ *
+ *    If the call succeeds, the kernel immediately runs the first
+ *    instruction of the new program. This is not a function call,
+ *    and it never returns to the previous program. If the call
+ *    fails, then the helper has no effect, and the caller continues
+ *    to run its subsequent instructions. A call can fail if the
+ *    destination program for the jump does not exist (i.e. *index*
+ *    is superior to the number of entries in *prog_array_map*), or
+ *    if the maximum number of tail calls has been reached for this
+ *    chain of programs. This limit is defined in the kernel by the
+ *    macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ *    which is currently set to 32.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ *  Description
+ *    Clone and redirect the packet associated to *skb* to another
+ *    net device of index *ifindex*. Both ingress and egress
+ *    interfaces can be used for redirection. The **BPF_F_INGRESS**
+ *    value in *flags* is used to make the distinction (ingress path
+ *    is selected if the flag is present, egress path otherwise).
+ *    This is the only flag supported for now.
+ *
+ *    In comparison with **bpf_redirect**\ () helper,
+ *    **bpf_clone_redirect**\ () has the associated cost of
+ *    duplicating the packet buffer, but this can be executed out of
+ *    the eBPF program. Conversely, **bpf_redirect**\ () is more
+ *    efficient, but it is handled through an action code where the
+ *    redirection happens only after the eBPF program has returned.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
- *     Return: current->tgid << 32 | current->pid
+ *  Return
+ *    A 64-bit integer containing the current tgid and pid, and
+ *    created as such:
+ *    *current_task*\ **->tgid << 32 \|**
+ *    *current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
- *     Return: current_gid << 32 | current_uid
- *
- * int bpf_get_current_comm(char *buf, int size_of_buf)
- *     stores current->comm into buf
- *     Return: 0 on success or negative error
- *
- * u32 bpf_get_cgroup_classid(skb)
- *     retrieve a proc's classid
- *     @skb: pointer to skb
- *     Return: classid if != 0
- *
- * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_vlan_pop(skb)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_get_tunnel_key(skb, key, size, flags)
- * int bpf_skb_set_tunnel_key(skb, key, size, flags)
- *     retrieve or populate tunnel metadata
- *     @skb: pointer to skb
- *     @key: pointer to 'struct bpf_tunnel_key'
- *     @size: size of 'struct bpf_tunnel_key'
- *     @flags: room for future extensions
- *     Return: 0 on success or negative error
- *
- * u64 bpf_perf_event_read(map, flags)
- *     read perf event counter value
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     Return: value of perf event counter read or error code
- *
- * int bpf_redirect(ifindex, flags)
- *     redirect to another netdev
- *     @ifindex: ifindex of the net device
- *     @flags:
- *    cls_bpf:
- *          bit 0 - if set, redirect to ingress instead of egress
- *          other bits - reserved
- *    xdp_bpf:
- *      all bits - reserved
- *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- *         xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
- * int bpf_redirect_map(map, key, flags)
- *     redirect to endpoint in map
- *     @map: pointer to dev map
- *     @key: index in map to lookup
- *     @flags: --
- *     Return: XDP_REDIRECT on success or XDP_ABORT on error
- *
- * u32 bpf_get_route_realm(skb)
- *     retrieve a dst's tclassid
- *     @skb: pointer to skb
- *     Return: realm if != 0
- *
- * int bpf_perf_event_output(ctx, map, flags, data, size)
- *     output perf raw sample
- *     @ctx: struct pt_regs*
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @data: data on stack to be output as raw data
- *     @size: size of data
- *     Return: 0 on success or negative error
- *
- * int bpf_get_stackid(ctx, map, flags)
- *     walk user or kernel stack and return id
- *     @ctx: struct pt_regs*
- *     @map: pointer to stack_trace map
- *     @flags: bits 0-7 - numer of stack frames to skip
- *             bit 8 - collect user stack instead of kernel
- *             bit 9 - compare stacks by hash only
- *             bit 10 - if two different stacks hash into the same stackid
- *                      discard old
- *             other bits - reserved
- *     Return: >= 0 stackid on success or negative error
- *
- * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
- *     calculate csum diff
- *     @from: raw from buffer
- *     @from_size: length of from buffer
- *     @to: raw to buffer
- *     @to_size: length of to buffer
- *     @seed: optional seed
- *     Return: csum result or negative error code
- *
- * int bpf_skb_get_tunnel_opt(skb, opt, size)
- *     retrieve tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: option size
- *
- * int bpf_skb_set_tunnel_opt(skb, opt, size)
- *     populate tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_proto(skb, proto, flags)
- *     Change protocol of the skb. Currently supported is v4 -> v6,
- *     v6 -> v4 transitions. The helper will also resize the skb. eBPF
- *     program is expected to fill the new headers via skb_store_bytes
- *     and lX_csum_replace.
- *     @skb: pointer to skb
- *     @proto: new skb->protocol type
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_type(skb, type)
- *     Change packet type of skb.
- *     @skb: pointer to skb
- *     @type: new skb->pkt_type type
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_under_cgroup(skb, map, index)
- *     Check cgroup2 membership of skb
- *     @skb: pointer to skb
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 skb failed the cgroup2 descendant test
- *       == 1 skb succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * u32 bpf_get_hash_recalc(skb)
- *     Retrieve and possibly recalculate skb->hash.
- *     @skb: pointer to skb
- *     Return: hash
+ *  Return
+ *    A 64-bit integer containing the current GID and UID, and
+ *    created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ *  Description
+ *    Copy the **comm** attribute of the current task into *buf* of
+ *    *size_of_buf*. The **comm** attribute contains the name of
+ *    the executable (excluding the path) for the current task. The
+ *    *size_of_buf* must be strictly positive. On success, the
+ *    helper makes sure that the *buf* is NUL-terminated. On failure,
+ *    it is filled with zeroes.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ *  Description
+ *    Retrieve the classid for the current task, i.e. for the net_cls
+ *    cgroup to which *skb* belongs.
+ *
+ *    This helper can be used on TC egress path, but not on ingress.
+ *
+ *    The net_cls cgroup provides an interface to tag network packets
+ *    based on a user-provided identifier for all traffic coming from
+ *    the tasks belonging to the related cgroup. See also the related
+ *    kernel documentation, available from the Linux sources in file
+ *    *Documentation/cgroup-v1/net_cls.txt*.
+ *
+ *    The Linux kernel has two versions for cgroups: there are
+ *    cgroups v1 and cgroups v2. Both are available to users, who can
+ *    use a mixture of them, but note that the net_cls cgroup is for
+ *    cgroup v1 only. This makes it incompatible with BPF programs
+ *    run on cgroups, which is a cgroup-v2-only feature (a socket can
+ *    only hold data for one version of cgroups at a time).
+ *
+ *    This helper is only available is the kernel was compiled with
+ *    the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ *    "**y**" or to "**m**".
+ *  Return
+ *    The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ *  Description
+ *    Push a *vlan_tci* (VLAN tag control information) of protocol
+ *    *vlan_proto* to the packet associated to *skb*, then update
+ *    the checksum. Note that if *vlan_proto* is different from
+ *    **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ *    be **ETH_P_8021Q**.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ *  Description
+ *    Pop a VLAN header from the packet associated to *skb*.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ *  Description
+ *    Get tunnel metadata. This helper takes a pointer *key* to an
+ *    empty **struct bpf_tunnel_key** of **size**, that will be
+ *    filled with tunnel metadata for the packet associated to *skb*.
+ *    The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ *    indicates that the tunnel is based on IPv6 protocol instead of
+ *    IPv4.
+ *
+ *    The **struct bpf_tunnel_key** is an object that generalizes the
+ *    principal parameters used by various tunneling protocols into a
+ *    single struct. This way, it can be used to easily make a
+ *    decision based on the contents of the encapsulation header,
+ *    "summarized" in this struct. In particular, it holds the IP
+ *    address of the remote end (IPv4 or IPv6, depending on the case)
+ *    in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ *    this struct exposes the *key*\ **->tunnel_id**, which is
+ *    generally mapped to a VNI (Virtual Network Identifier), making
+ *    it programmable together with the **bpf_skb_set_tunnel_key**\
+ *    () helper.
+ *
+ *    Let's imagine that the following code is part of a program
+ *    attached to the TC ingress interface, on one end of a GRE
+ *    tunnel, and is supposed to filter out all messages coming from
+ *    remote ends with IPv4 address other than 10.0.0.1:
+ *
+ *    ::
+ *
+ *      int ret;
+ *      struct bpf_tunnel_key key = {};
+ *
+ *      ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ *      if (ret < 0)
+ *        return TC_ACT_SHOT; // drop packet
+ *
+ *      if (key.remote_ipv4 != 0x0a000001)
+ *        return TC_ACT_SHOT; // drop packet
+ *
+ *      return TC_ACT_OK;   // accept packet
+ *
+ *    This interface can also be used with all encapsulation devices
+ *    that can operate in "collect metadata" mode: instead of having
+ *    one network device per specific configuration, the "collect
+ *    metadata" mode only requires a single device where the
+ *    configuration can be extracted from this helper.
+ *
+ *    This can be used together with various tunnels such as VXLan,
+ *    Geneve, GRE or IP in IP (IPIP).
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ *  Description
+ *    Populate tunnel metadata for packet associated to *skb.* The
+ *    tunnel metadata is set to the contents of *key*, of *size*. The
+ *    *flags* can be set to a combination of the following values:
+ *
+ *    **BPF_F_TUNINFO_IPV6**
+ *      Indicate that the tunnel is based on IPv6 protocol
+ *      instead of IPv4.
+ *    **BPF_F_ZERO_CSUM_TX**
+ *      For IPv4 packets, add a flag to tunnel metadata
+ *      indicating that checksum computation should be skipped
+ *      and checksum set to zeroes.
+ *    **BPF_F_DONT_FRAGMENT**
+ *      Add a flag to tunnel metadata indicating that the
+ *      packet should not be fragmented.
+ *    **BPF_F_SEQ_NUMBER**
+ *      Add a flag to tunnel metadata indicating that a
+ *      sequence number should be added to tunnel header before
+ *      sending the packet. This flag was added for GRE
+ *      encapsulation, but might be used with other protocols
+ *      as well in the future.
+ *
+ *    Here is a typical usage on the transmit path:
+ *
+ *    ::
+ *
+ *      struct bpf_tunnel_key key;
+ *           populate key ...
+ *      bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ *      bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ *    See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ *    helper for additional information.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ *  Description
+ *    Read the value of a perf event counter. This helper relies on a
+ *    *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ *    the perf event counter is selected when *map* is updated with
+ *    perf event file descriptors. The *map* is an array whose size
+ *    is the number of available CPUs, and each cell contains a value
+ *    relative to one CPU. The value to retrieve is indicated by
+ *    *flags*, that contains the index of the CPU to look up, masked
+ *    with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ *    **BPF_F_CURRENT_CPU** to indicate that the value for the
+ *    current CPU should be retrieved.
+ *
+ *    Note that before Linux 4.13, only hardware perf event can be
+ *    retrieved.
+ *
+ *    Also, be aware that the newer helper
+ *    **bpf_perf_event_read_value**\ () is recommended over
+ *    **bpf_perf_event_read**\ () in general. The latter has some ABI
+ *    quirks where error and counter value are used as a return code
+ *    (which is wrong to do since ranges may overlap). This issue is
+ *    fixed with **bpf_perf_event_read_value**\ (), which at the same
+ *    time provides more features over the **bpf_perf_event_read**\
+ *    () interface. Please refer to the description of
+ *    **bpf_perf_event_read_value**\ () for details.
+ *  Return
+ *    The value of the perf event counter read from the map, or a
+ *    negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ *  Description
+ *    Redirect the packet to another net device of index *ifindex*.
+ *    This helper is somewhat similar to **bpf_clone_redirect**\
+ *    (), except that the packet is not cloned, which provides
+ *    increased performance.
+ *
+ *    Except for XDP, both ingress and egress interfaces can be used
+ *    for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ *    to make the distinction (ingress path is selected if the flag
+ *    is present, egress path otherwise). Currently, XDP only
+ *    supports redirection to the egress interface, and accepts no
+ *    flag at all.
+ *
+ *    The same effect can be attained with the more generic
+ *    **bpf_redirect_map**\ (), which requires specific maps to be
+ *    used but offers better performance.
+ *  Return
+ *    For XDP, the helper returns **XDP_REDIRECT** on success or
+ *    **XDP_ABORTED** on error. For other program types, the values
+ *    are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ *    error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ *  Description
+ *    Retrieve the realm or the route, that is to say the
+ *    **tclassid** field of the destination for the *skb*. The
+ *    indentifier retrieved is a user-provided tag, similar to the
+ *    one used with the net_cls cgroup (see description for
+ *    **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ *    held by a route (a destination entry), not by a task.
+ *
+ *    Retrieving this identifier works with the clsact TC egress hook
+ *    (see also **tc-bpf(8)**), or alternatively on conventional
+ *    classful egress qdiscs, but not on TC ingress path. In case of
+ *    clsact TC egress hook, this has the advantage that, internally,
+ *    the destination entry has not been dropped yet in the transmit
+ *    path. Therefore, the destination entry does not need to be
+ *    artificially held via **netif_keep_dst**\ () for a classful
+ *    qdisc until the *skb* is freed.
+ *
+ *    This helper is available only if the kernel was compiled with
+ *    **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ *  Return
+ *    The realm of the route for the packet associated to *skb*, or 0
+ *    if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *  Description
+ *    Write raw *data* blob into a special BPF perf event held by
+ *    *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *    event must have the following attributes: **PERF_SAMPLE_RAW**
+ *    as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *    **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *    The *flags* are used to indicate the index in *map* for which
+ *    the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *    Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *    to indicate that the index of the current CPU core should be
+ *    used.
+ *
+ *    The value to write, of *size*, is passed through eBPF stack and
+ *    pointed by *data*.
+ *
+ *    The context of the program *ctx* needs also be passed to the
+ *    helper.
+ *
+ *    On user space, a program willing to read the values needs to
+ *    call **perf_event_open**\ () on the perf event (either for
+ *    one or for all CPUs) and to store the file descriptor into the
+ *    *map*. This must be done before the eBPF program can send data
+ *    into it. An example is available in file
+ *    *samples/bpf/trace_output_user.c* in the Linux kernel source
+ *    tree (the eBPF program counterpart is in
+ *    *samples/bpf/trace_output_kern.c*).
+ *
+ *    **bpf_perf_event_output**\ () achieves better performance
+ *    than **bpf_trace_printk**\ () for sharing data with user
+ *    space, and is much better suitable for streaming data from eBPF
+ *    programs.
+ *
+ *    Note that this helper is not restricted to tracing use cases
+ *    and can be used with programs attached to TC or XDP as well,
+ *    where it allows for passing data to user space listeners. Data
+ *    can be:
+ *
+ *    * Only custom structs,
+ *    * Only the packet payload, or
+ *    * A combination of both.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ *  Description
+ *    This helper was provided as an easy way to load data from a
+ *    packet. It can be used to load *len* bytes from *offset* from
+ *    the packet associated to *skb*, into the buffer pointed by
+ *    *to*.
+ *
+ *    Since Linux 4.7, usage of this helper has mostly been replaced
+ *    by "direct packet access", enabling packet data to be
+ *    manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ *    pointing respectively to the first byte of packet data and to
+ *    the byte after the last byte of packet data. However, it
+ *    remains useful if one wishes to read large quantities of data
+ *    at once from a packet into the eBPF stack.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ *  Description
+ *    Walk a user or a kernel stack and return its id. To achieve
+ *    this, the helper needs *ctx*, which is a pointer to the context
+ *    on which the tracing program is executed, and a pointer to a
+ *    *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ *    The last argument, *flags*, holds the number of stack frames to
+ *    skip (from 0 to 255), masked with
+ *    **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ *    a combination of the following flags:
+ *
+ *    **BPF_F_USER_STACK**
+ *      Collect a user space stack instead of a kernel stack.
+ *    **BPF_F_FAST_STACK_CMP**
+ *      Compare stacks by hash only.
+ *    **BPF_F_REUSE_STACKID**
+ *      If two different stacks hash into the same *stackid*,
+ *      discard the old one.
+ *
+ *    The stack id retrieved is a 32 bit long integer handle which
+ *    can be further combined with other data (including other stack
+ *    ids) and used as a key into maps. This can be useful for
+ *    generating a variety of graphs (such as flame graphs or off-cpu
+ *    graphs).
+ *
+ *    For walking a stack, this helper is an improvement over
+ *    **bpf_probe_read**\ (), which can be used with unrolled loops
+ *    but is not efficient and consumes a lot of eBPF instructions.
+ *    Instead, **bpf_get_stackid**\ () can collect up to
+ *    **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ *    this limit can be controlled with the **sysctl** program, and
+ *    that it should be manually increased in order to profile long
+ *    user stacks (such as stacks for Java programs). To do so, use:
+ *
+ *    ::
+ *
+ *      # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ *  Return
+ *    The positive or null stack id on success, or a negative error
+ *    in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ *  Description
+ *    Compute a checksum difference, from the raw buffer pointed by
+ *    *from*, of length *from_size* (that must be a multiple of 4),
+ *    towards the raw buffer pointed by *to*, of size *to_size*
+ *    (same remark). An optional *seed* can be added to the value
+ *    (this can be cascaded, the seed may come from a previous call
+ *    to the helper).
+ *
+ *    This is flexible enough to be used in several ways:
+ *
+ *    * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ *      checksum, it can be used when pushing new data.
+ *    * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ *      checksum, it can be used when removing data from a packet.
+ *    * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ *      can be used to compute a diff. Note that *from_size* and
+ *      *to_size* do not need to be equal.
+ *
+ *    This helper can be used in combination with
+ *    **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ *    which one can feed in the difference computed with
+ *    **bpf_csum_diff**\ ().
+ *  Return
+ *    The checksum result, or a negative error code in case of
+ *    failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ *  Description
+ *    Retrieve tunnel options metadata for the packet associated to
+ *    *skb*, and store the raw tunnel option data to the buffer *opt*
+ *    of *size*.
+ *
+ *    This helper can be used with encapsulation devices that can
+ *    operate in "collect metadata" mode (please refer to the related
+ *    note in the description of **bpf_skb_get_tunnel_key**\ () for
+ *    more details). A particular example where this can be used is
+ *    in combination with the Geneve encapsulation protocol, where it
+ *    allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ *    and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ *    the eBPF program. This allows for full customization of these
+ *    headers.
+ *  Return
+ *    The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ *  Description
+ *    Set tunnel options metadata for the packet associated to *skb*
+ *    to the option data contained in the raw buffer *opt* of *size*.
+ *
+ *    See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ *    helper for additional information.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ *  Description
+ *    Change the protocol of the *skb* to *proto*. Currently
+ *    supported are transition from IPv4 to IPv6, and from IPv6 to
+ *    IPv4. The helper takes care of the groundwork for the
+ *    transition, including resizing the socket buffer. The eBPF
+ *    program is expected to fill the new headers, if any, via
+ *    **skb_store_bytes**\ () and to recompute the checksums with
+ *    **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ *    (). The main case for this helper is to perform NAT64
+ *    operations out of an eBPF program.
+ *
+ *    Internally, the GSO type is marked as dodgy so that headers are
+ *    checked and segments are recalculated by the GSO/GRO engine.
+ *    The size for GSO target is adapted as well.
+ *
+ *    All values for *flags* are reserved for future usage, and must
+ *    be left at zero.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ *  Description
+ *    Change the packet type for the packet associated to *skb*. This
+ *    comes down to setting *skb*\ **->pkt_type** to *type*, except
+ *    the eBPF program does not have a write access to *skb*\
+ *    **->pkt_type** beside this helper. Using a helper here allows
+ *    for graceful handling of errors.
+ *
+ *    The major use case is to change incoming *skb*s to
+ *    **PACKET_HOST** in a programmatic way instead of having to
+ *    recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ *    example.
+ *
+ *    Note that *type* only allows certain values. At this time, they
+ *    are:
+ *
+ *    **PACKET_HOST**
+ *      Packet is for us.
+ *    **PACKET_BROADCAST**
+ *      Send packet to all.
+ *    **PACKET_MULTICAST**
+ *      Send packet to group.
+ *    **PACKET_OTHERHOST**
+ *      Send packet to someone else.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ *  Description
+ *    Check whether *skb* is a descendant of the cgroup2 held by
+ *    *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *  Return
+ *    The return value depends on the result of the test, and can be:
+ *
+ *    * 0, if the *skb* failed the cgroup2 descendant test.
+ *    * 1, if the *skb* succeeded the cgroup2 descendant test.
+ *    * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ *  Description
+ *    Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ *    not set, in particular if the hash was cleared due to mangling,
+ *    recompute this hash. Later accesses to the hash can be done
+ *    directly with *skb*\ **->hash**.
+ *
+ *    Calling **bpf_set_hash_invalid**\ (), changing a packet
+ *    prototype with **bpf_skb_change_proto**\ (), or calling
+ *    **bpf_skb_store_bytes**\ () with the
+ *    **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ *    the hash and to trigger a new computation for the next call to
+ *    **bpf_get_hash_recalc**\ ().
+ *  Return
+ *    The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
- *     Returns current task_struct
- *     Return: current
- *
- * int bpf_probe_write_user(void *dst, void *src, int len)
- *     safely attempt to write to a location
- *     @dst: destination address in userspace
- *     @src: source address on stack
- *     @len: number of bytes to copy
- *     Return: 0 on success or negative error
- *
- * int bpf_current_task_under_cgroup(map, index)
- *     Check cgroup2 membership of current task
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 current failed the cgroup2 descendant test
- *       == 1 current succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * int bpf_skb_change_tail(skb, len, flags)
- *     The helper will resize the skb to the given new size, to be used f.e.
- *     with control messages.
- *     @skb: pointer to skb
- *     @len: new skb length
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_pull_data(skb, len)
- *     The helper will pull in non-linear data in case the skb is non-linear
- *     and not all of len are part of the linear section. Only needed for
- *     read/write with direct packet access.
- *     @skb: pointer to skb
- *     @len: len to make read/writeable
- *     Return: 0 on success or negative error
- *
- * s64 bpf_csum_update(skb, csum)
- *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
- *     @skb: pointer to skb
- *     @csum: csum to add
- *     Return: csum on success or negative error
- *
- * void bpf_set_hash_invalid(skb)
- *     Invalidate current skb->hash.
- *     @skb: pointer to skb
- *
- * int bpf_get_numa_node_id()
- *     Return: Id of current NUMA node.
- *
- * int bpf_skb_change_head()
- *     Grows headroom of skb and adjusts MAC header offset accordingly.
- *     Will extends/reallocae as required automatically.
- *     May change skb data pointer and will thus invalidate any check
- *     performed for direct packet access.
- *     @skb: pointer to skb
- *     @len: length of header to be pushed in front
- *     @flags: Flags (unused for now)
- *     Return: 0 on success or negative error
- *
- * int bpf_xdp_adjust_head(xdp_md, delta)
- *     Adjust the xdp_md.data by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data
- *     Return: 0 on success or negative on error
+ *  Return
+ *    A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ *  Description
+ *    Attempt in a safe way to write *len* bytes from the buffer
+ *    *src* to *dst* in memory. It only works for threads that are in
+ *    user context, and *dst* must be a valid user space address.
+ *
+ *    This helper should not be used to implement any kind of
+ *    security mechanism because of TOC-TOU attacks, but rather to
+ *    debug, divert, and manipulate execution of semi-cooperative
+ *    processes.
+ *
+ *    Keep in mind that this feature is meant for experiments, and it
+ *    has a risk of crashing the system and running programs.
+ *    Therefore, when an eBPF program using this helper is attached,
+ *    a warning including PID and process name is printed to kernel
+ *    logs.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ *  Description
+ *    Check whether the probe is being run is the context of a given
+ *    subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ *    *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *  Return
+ *    The return value depends on the result of the test, and can be:
+ *
+ *    * 0, if the *skb* task belongs to the cgroup2.
+ *    * 1, if the *skb* task does not belong to the cgroup2.
+ *    * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ *  Description
+ *    Resize (trim or grow) the packet associated to *skb* to the
+ *    new *len*. The *flags* are reserved for future usage, and must
+ *    be left at zero.
+ *
+ *    The basic idea is that the helper performs the needed work to
+ *    change the size of the packet, then the eBPF program rewrites
+ *    the rest via helpers like **bpf_skb_store_bytes**\ (),
+ *    **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ *    and others. This helper is a slow path utility intended for
+ *    replies with control messages. And because it is targeted for
+ *    slow path, the helper itself can afford to be slow: it
+ *    implicitly linearizes, unclones and drops offloads from the
+ *    *skb*.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ *  Description
+ *    Pull in non-linear data in case the *skb* is non-linear and not
+ *    all of *len* are part of the linear section. Make *len* bytes
+ *    from *skb* readable and writable. If a zero value is passed for
+ *    *len*, then the whole length of the *skb* is pulled.
+ *
+ *    This helper is only needed for reading and writing with direct
+ *    packet access.
+ *
+ *    For direct packet access, testing that offsets to access
+ *    are within packet boundaries (test on *skb*\ **->data_end**) is
+ *    susceptible to fail if offsets are invalid, or if the requested
+ *    data is in non-linear parts of the *skb*. On failure the
+ *    program can just bail out, or in the case of a non-linear
+ *    buffer, use a helper to make the data available. The
+ *    **bpf_skb_load_bytes**\ () helper is a first solution to access
+ *    the data. Another one consists in using **bpf_skb_pull_data**
+ *    to pull in once the non-linear parts, then retesting and
+ *    eventually access the data.
+ *
+ *    At the same time, this also makes sure the *skb* is uncloned,
+ *    which is a necessary condition for direct write. As this needs
+ *    to be an invariant for the write part only, the verifier
+ *    detects writes and adds a prologue that is calling
+ *    **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ *    the very beginning in case it is indeed cloned.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ *  Description
+ *    Add the checksum *csum* into *skb*\ **->csum** in case the
+ *    driver has supplied a checksum for the entire packet into that
+ *    field. Return an error otherwise. This helper is intended to be
+ *    used in combination with **bpf_csum_diff**\ (), in particular
+ *    when the checksum needs to be updated after data has been
+ *    written into the packet through direct packet access.
+ *  Return
+ *    The checksum on success, or a negative error code in case of
+ *    failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ *  Description
+ *    Invalidate the current *skb*\ **->hash**. It can be used after
+ *    mangling on headers through direct packet access, in order to
+ *    indicate that the hash is outdated and to trigger a
+ *    recalculation the next time the kernel tries to access this
+ *    hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ *  Description
+ *    Return the id of the current NUMA node. The primary use case
+ *    for this helper is the selection of sockets for the local NUMA
+ *    node, when the program is attached to sockets using the
+ *    **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ *    but the helper is also available to other eBPF program types,
+ *    similarly to **bpf_get_smp_processor_id**\ ().
+ *  Return
+ *    The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ *  Description
+ *    Grows headroom of packet associated to *skb* and adjusts the
+ *    offset of the MAC header accordingly, adding *len* bytes of
+ *    space. It automatically extends and reallocates memory as
+ *    required.
+ *
+ *    This helper can be used on a layer 3 *skb* to push a MAC header
+ *    for redirection into a layer 2 device.
+ *
+ *    All values for *flags* are reserved for future usage, and must
+ *    be left at zero.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ *  Description
+ *    Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ *    it is possible to use a negative value for *delta*. This helper
+ *    can be used to prepare the packet for pushing or popping
+ *    headers.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
  *
  * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
- *     Copy a NUL terminated string from unsafe address. In case the string
- *     length is smaller than size, the target is not padded with further NUL
- *     bytes. In case the string length is larger than size, just count-1
- *     bytes are copied and the last byte is set to NUL.
- *     @dst: destination address
- *     @size: maximum number of bytes to copy, including the trailing NUL
- *     @unsafe_ptr: unsafe address
- *     Return:
- *       > 0 length of the string including the trailing NUL on success
- *       < 0 error
- *
- * u64 bpf_get_socket_cookie(skb)
- *     Get the cookie for the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
- *     field is missing inside sk_buff
- *
- * u32 bpf_get_socket_uid(skb)
- *     Get the owner uid of the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: uid of the socket owner on success or overflowuid if failed.
- *
- * u32 bpf_set_hash(skb, hash)
- *     Set full skb->hash.
- *     @skb: pointer to skb
- *     @hash: hash to set
- *
- * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls setsockopt. Not all opts are available, only those with
- *     integer optvals plus TCP_CONGESTION.
- *     Supported levels: SOL_SOCKET and IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: SOL_SOCKET or IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls getsockopt. Not all opts are available.
- *     Supported levels: IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
- *     Grow or shrink room in sk_buff.
- *     @skb: pointer to skb
- *     @len_diff: (signed) amount of room to grow/shrink
- *     @mode: operation mode (enum bpf_adj_room_mode)
- *     @flags: reserved for future use
- *     Return: 0 on success or negative error code
- *
- * int bpf_sk_redirect_map(map, key, flags)
- *     Redirect skb to a sock in map using key as a lookup key for the
- *     sock in map.
- *     @map: pointer to sockmap
- *     @key: key to lookup sock in map
- *     @flags: reserved for future use
- *     Return: SK_PASS
- *
- * int bpf_sock_map_update(skops, map, key, flags)
- *  @skops: pointer to bpf_sock_ops
- *  @map: pointer to sockmap to update
- *  @key: key to insert/update sock in map
- *  @flags: same flags as map update elem
- *
- * int bpf_xdp_adjust_meta(xdp_md, delta)
- *     Adjust the xdp_md.data_meta by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data_meta
- *     Return: 0 on success or negative on error
- *
- * int bpf_perf_event_read_value(map, flags, buf, buf_size)
- *     read perf event counter value and perf event enabled/running time
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return: 0 on success or negative error code
- *
- * int bpf_perf_prog_read_value(ctx, buf, buf_size)
- *     read perf prog attached perf event counter and enabled/running time
- *     @ctx: pointer to ctx
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return : 0 on success or negative error code
- *
- * int bpf_override_return(pt_regs, rc)
- *  @pt_regs: pointer to struct pt_regs
- *  @rc: the return value to set
+ *  Description
+ *    Copy a NUL terminated string from an unsafe address
+ *    *unsafe_ptr* to *dst*. The *size* should include the
+ *    terminating NUL byte. In case the string length is smaller than
+ *    *size*, the target is not padded with further NUL bytes. If the
+ *    string length is larger than *size*, just *size*-1 bytes are
+ *    copied and the last byte is set to NUL.
+ *
+ *    On success, the length of the copied string is returned. This
+ *    makes this helper useful in tracing programs for reading
+ *    strings, and more importantly to get its length at runtime. See
+ *    the following snippet:
+ *
+ *    ::
+ *
+ *      SEC("kprobe/sys_open")
+ *      void bpf_sys_open(struct pt_regs *ctx)
+ *      {
+ *              char buf[PATHLEN]; // PATHLEN is defined to 256
+ *              int res = bpf_probe_read_str(buf, sizeof(buf),
+ *                                     ctx->di);
+ *
+ *        // Consume buf, for example push it to
+ *        // userspace via bpf_perf_event_output(); we
+ *        // can use res (the string length) as event
+ *        // size, after checking its boundaries.
+ *      }
+ *
+ *    In comparison, using **bpf_probe_read()** helper here instead
+ *    to read the string would require to estimate the length at
+ *    compile time, and would often result in copying more memory
+ *    than necessary.
+ *
+ *    Another useful use case is when parsing individual process
+ *    arguments or individual environment variables navigating
+ *    *current*\ **->mm->arg_start** and *current*\
+ *    **->mm->env_start**: using this helper and the return value,
+ *    one can quickly iterate at the right offset of the memory area.
+ *  Return
+ *    On success, the strictly positive length of the string,
+ *    including the trailing NUL character. On error, a negative
+ *    value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ *  Description
+ *    If the **struct sk_buff** pointed by *skb* has a known socket,
+ *    retrieve the cookie (generated by the kernel) of this socket.
+ *    If no cookie has been set yet, generate a new cookie. Once
+ *    generated, the socket cookie remains stable for the life of the
+ *    socket. This helper can be useful for monitoring per socket
+ *    networking traffic statistics as it provides a unique socket
+ *    identifier per namespace.
+ *  Return
+ *    A 8-byte long non-decreasing number on success, or 0 if the
+ *    socket field is missing inside *skb*.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *  Return
+ *    The owner UID of the socket associated to *skb*. If the socket
+ *    is **NULL**, or if it is not a full socket (i.e. if it is a
+ *    time-wait or a request socket instead), **overflowuid** value
+ *    is returned (note that **overflowuid** might also be the actual
+ *    UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ *  Description
+ *    Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ *    to value *hash*.
+ *  Return
+ *    0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ *  Description
+ *    Emulate a call to **setsockopt()** on the socket associated to
+ *    *bpf_socket*, which must be a full socket. The *level* at
+ *    which the option resides and the name *optname* of the option
+ *    must be specified, see **setsockopt(2)** for more information.
+ *    The option value of length *optlen* is pointed by *optval*.
+ *
+ *    This helper actually implements a subset of **setsockopt()**.
+ *    It supports the following *level*\ s:
+ *
+ *    * **SOL_SOCKET**, which supports the following *optname*\ s:
+ *      **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ *      **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ *    * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ *      **TCP_CONGESTION**, **TCP_BPF_IW**,
+ *      **TCP_BPF_SNDCWND_CLAMP**.
+ *    * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ *    * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ *  Description
+ *    Grow or shrink the room for data in the packet associated to
+ *    *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ *    There is a single supported mode at this time:
+ *
+ *    * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ *      (room space is added or removed below the layer 3 header).
+ *
+ *    All values for *flags* are reserved for future usage, and must
+ *    be left at zero.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ *  Description
+ *    Redirect the packet to the endpoint referenced by *map* at
+ *    index *key*. Depending on its type, this *map* can contain
+ *    references to net devices (for forwarding packets through other
+ *    ports), or to CPUs (for redirecting XDP frames to another CPU;
+ *    but this is only implemented for native XDP (with driver
+ *    support) as of this writing).
+ *
+ *    All values for *flags* are reserved for future usage, and must
+ *    be left at zero.
+ *
+ *    When used to redirect packets to net devices, this helper
+ *    provides a high performance increase over **bpf_redirect**\ ().
+ *    This is due to various implementation details of the underlying
+ *    mechanisms, one of which is the fact that **bpf_redirect_map**\
+ *    () tries to send packet as a "bulk" to the device.
+ *  Return
+ *    **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ *  Description
+ *    Redirect the packet to the socket referenced by *map* (of type
+ *    **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ *    egress interfaces can be used for redirection. The
+ *    **BPF_F_INGRESS** value in *flags* is used to make the
+ *    distinction (ingress path is selected if the flag is present,
+ *    egress path otherwise). This is the only flag supported for now.
+ *  Return
+ *    **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ *  Description
+ *    Add an entry to, or update a *map* referencing sockets. The
+ *    *skops* is used as a new value for the entry associated to
+ *    *key*. *flags* is one of:
+ *
+ *    **BPF_NOEXIST**
+ *      The entry for *key* must not exist in the map.
+ *    **BPF_EXIST**
+ *      The entry for *key* must already exist in the map.
+ *    **BPF_ANY**
+ *      No condition on the existence of the entry for *key*.
+ *
+ *    If the *map* has eBPF programs (parser and verdict), those will
+ *    be inherited by the socket being added. If the socket is
+ *    already attached to eBPF programs, this results in an error.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ *  Description
+ *    Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ *    *delta* (which can be positive or negative). Note that this
+ *    operation modifies the address stored in *xdp_md*\ **->data**,
+ *    so the latter must be loaded only after the helper has been
+ *    called.
+ *
+ *    The use of *xdp_md*\ **->data_meta** is optional and programs
+ *    are not required to use it. The rationale is that when the
+ *    packet is processed with XDP (e.g. as DoS filter), it is
+ *    possible to push further meta data along with it before passing
+ *    to the stack, and to give the guarantee that an ingress eBPF
+ *    program attached as a TC classifier on the same device can pick
+ *    this up for further post-processing. Since TC works with socket
+ *    buffers, it remains possible to set from XDP the **mark** or
+ *    **priority** pointers, or other pointers for the socket buffer.
+ *    Having this scratch space generic and programmable allows for
+ *    more flexibility as the user is free to store whatever meta
+ *    data they need.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ *  Description
+ *    Read the value of a perf event counter, and store it into *buf*
+ *    of size *buf_size*. This helper relies on a *map* of type
+ *    **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ *    counter is selected when *map* is updated with perf event file
+ *    descriptors. The *map* is an array whose size is the number of
+ *    available CPUs, and each cell contains a value relative to one
+ *    CPU. The value to retrieve is indicated by *flags*, that
+ *    contains the index of the CPU to look up, masked with
+ *    **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ *    **BPF_F_CURRENT_CPU** to indicate that the value for the
+ *    current CPU should be retrieved.
+ *
+ *    This helper behaves in a way close to
+ *    **bpf_perf_event_read**\ () helper, save that instead of
+ *    just returning the value observed, it fills the *buf*
+ *    structure. This allows for additional data to be retrieved: in
+ *    particular, the enabled and running times (in *buf*\
+ *    **->enabled** and *buf*\ **->running**, respectively) are
+ *    copied. In general, **bpf_perf_event_read_value**\ () is
+ *    recommended over **bpf_perf_event_read**\ (), which has some
+ *    ABI issues and provides fewer functionalities.
+ *
+ *    These values are interesting, because hardware PMU (Performance
+ *    Monitoring Unit) counters are limited resources. When there are
+ *    more PMU based perf events opened than available counters,
+ *    kernel will multiplex these events so each event gets certain
+ *    percentage (but not all) of the PMU time. In case that
+ *    multiplexing happens, the number of samples or counter value
+ *    will not reflect the case compared to when no multiplexing
+ *    occurs. This makes comparison between different runs difficult.
+ *    Typically, the counter value should be normalized before
+ *    comparing to other experiments. The usual normalization is done
+ *    as follows.
+ *
+ *    ::
+ *
+ *      normalized_counter = counter * t_enabled / t_running
+ *
+ *    Where t_enabled is the time enabled for event and t_running is
+ *    the time running for event since last normalization. The
+ *    enabled and running times are accumulated since the perf event
+ *    open. To achieve scaling factor between two invocations of an
+ *    eBPF program, users can can use CPU id as the key (which is
+ *    typical for perf array usage model) to remember the previous
+ *    value and do the calculation inside the eBPF program.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ *  Description
+ *    For en eBPF program attached to a perf event, retrieve the
+ *    value of the event counter associated to *ctx* and store it in
+ *    the structure pointed by *buf* and of size *buf_size*. Enabled
+ *    and running times are also stored in the structure (see
+ *    description of helper **bpf_perf_event_read_value**\ () for
+ *    more details).
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ *  Description
+ *    Emulate a call to **getsockopt()** on the socket associated to
+ *    *bpf_socket*, which must be a full socket. The *level* at
+ *    which the option resides and the name *optname* of the option
+ *    must be specified, see **getsockopt(2)** for more information.
+ *    The retrieved value is stored in the structure pointed by
+ *    *opval* and of length *optlen*.
+ *
+ *    This helper actually implements a subset of **getsockopt()**.
+ *    It supports the following *level*\ s:
+ *
+ *    * **IPPROTO_TCP**, which supports *optname*
+ *      **TCP_CONGESTION**.
+ *    * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ *    * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ *  Description
+ *    Used for error injection, this helper uses kprobes to override
+ *    the return value of the probed function, and to set it to *rc*.
+ *    The first argument is the context *regs* on which the kprobe
+ *    works.
+ *
+ *    This helper works by setting setting the PC (program counter)
+ *    to an override function which is run in place of the original
+ *    probed function. This means the probed function is not run at
+ *    all. The replacement function just returns with the required
+ *    value.
+ *
+ *    This helper has security implications, and thus is subject to
+ *    restrictions. It is only available if the kernel was compiled
+ *    with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ *    option, and in this case it only works on functions tagged with
+ *    **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ *    Also, the helper is only available for the architectures having
+ *    the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ *    x86 architecture is the only one to support this feature.
+ *  Return
+ *    0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ *  Description
+ *    Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ *    for the full TCP socket associated to *bpf_sock_ops* to
+ *    *argval*.
+ *
+ *    The primary use of this field is to determine if there should
+ *    be calls to eBPF programs of type
+ *    **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ *    code. A program of the same type can change its value, per
+ *    connection and as necessary, when the connection is
+ *    established. This field is directly accessible for reading, but
+ *    this helper must be used for updates in order to return an
+ *    error if an eBPF program tries to set a callback that is not
+ *    supported in the current kernel.
+ *
+ *    The supported callback values that *argval* can combine are:
+ *
+ *    * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ *    * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ *    * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ *    Here are some examples of where one could call such eBPF
+ *    program:
+ *
+ *    * When RTO fires.
+ *    * When a packet is retransmitted.
+ *    * When the connection terminates.
+ *    * When a packet is sent.
+ *    * When a packet is received.
+ *  Return
+ *    Code **-EINVAL** if the socket is not a full TCP socket;
+ *    otherwise, a positive number containing the bits that could not
+ *    be set is returned (which comes down to 0 if all bits were set
+ *    as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ *  Description
+ *    This helper is used in programs implementing policies at the
+ *    socket level. If the message *msg* is allowed to pass (i.e. if
+ *    the verdict eBPF program returns **SK_PASS**), redirect it to
+ *    the socket referenced by *map* (of type
+ *    **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ *    egress interfaces can be used for redirection. The
+ *    **BPF_F_INGRESS** value in *flags* is used to make the
+ *    distinction (ingress path is selected if the flag is present,
+ *    egress path otherwise). This is the only flag supported for now.
+ *  Return
+ *    **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ *  Description
+ *    For socket policies, apply the verdict of the eBPF program to
+ *    the next *bytes* (number of bytes) of message *msg*.
+ *
+ *    For example, this helper can be used in the following cases:
+ *
+ *    * A single **sendmsg**\ () or **sendfile**\ () system call
+ *      contains multiple logical messages that the eBPF program is
+ *      supposed to read and for which it should apply a verdict.
+ *    * An eBPF program only cares to read the first *bytes* of a
+ *      *msg*. If the message has a large payload, then setting up
+ *      and calling the eBPF program repeatedly for all bytes, even
+ *      though the verdict is already known, would create unnecessary
+ *      overhead.
+ *
+ *    When called from within an eBPF program, the helper sets a
+ *    counter internal to the BPF infrastructure, that is used to
+ *    apply the last verdict to the next *bytes*. If *bytes* is
+ *    smaller than the current data being processed from a
+ *    **sendmsg**\ () or **sendfile**\ () system call, the first
+ *    *bytes* will be sent and the eBPF program will be re-run with
+ *    the pointer for start of data pointing to byte number *bytes*
+ *    **+ 1**. If *bytes* is larger than the current data being
+ *    processed, then the eBPF verdict will be applied to multiple
+ *    **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ *    consumed.
+ *
+ *    Note that if a socket closes with the internal counter holding
+ *    a non-zero value, this is not a problem because data is not
+ *    being buffered for *bytes* and is sent as it is received.
+ *  Return
+ *    0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ *  Description
+ *    For socket policies, prevent the execution of the verdict eBPF
+ *    program for message *msg* until *bytes* (byte number) have been
+ *    accumulated.
+ *
+ *    This can be used when one needs a specific number of bytes
+ *    before a verdict can be assigned, even if the data spans
+ *    multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ *    case would be a user calling **sendmsg**\ () repeatedly with
+ *    1-byte long message segments. Obviously, this is bad for
+ *    performance, but it is still valid. If the eBPF program needs
+ *    *bytes* bytes to validate a header, this helper can be used to
+ *    prevent the eBPF program to be called again until *bytes* have
+ *    been accumulated.
+ *  Return
+ *    0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ *  Description
+ *    For socket policies, pull in non-linear data from user space
+ *    for *msg* and set pointers *msg*\ **->data** and *msg*\
+ *    **->data_end** to *start* and *end* bytes offsets into *msg*,
+ *    respectively.
+ *
+ *    If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ *    *msg* it can only parse data that the (**data**, **data_end**)
+ *    pointers have already consumed. For **sendmsg**\ () hooks this
+ *    is likely the first scatterlist element. But for calls relying
+ *    on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ *    be the range (**0**, **0**) because the data is shared with
+ *    user space and by default the objective is to avoid allowing
+ *    user space to modify data while (or after) eBPF verdict is
+ *    being decided. This helper can be used to pull in data and to
+ *    set the start and end pointer to given values. Data will be
+ *    copied if necessary (i.e. if data was not linear and if start
+ *    and end pointers do not point to the same chunk).
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *
+ *    All values for *flags* are reserved for future usage, and must
+ *    be left at zero.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ *  Description
+ *    Bind the socket associated to *ctx* to the address pointed by
+ *    *addr*, of length *addr_len*. This allows for making outgoing
+ *    connection from the desired IP address, which can be useful for
+ *    example when all processes inside a cgroup should use one
+ *    single IP address on a host that has multiple IP configured.
+ *
+ *    This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ *    domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ *    **AF_INET6**). Looking for a free port to bind to can be
+ *    expensive, therefore binding to port is not permitted by the
+ *    helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ *    must be set to zero.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ *  Description
+ *    Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ *    only possible to shrink the packet as of this writing,
+ *    therefore *delta* must be a negative integer.
+ *
+ *    A call to this helper is susceptible to change the underlaying
+ *    packet buffer. Therefore, at load time, all checks on pointers
+ *    previously done by the verifier are invalidated and must be
+ *    performed again, if the helper is used in combination with
+ *    direct packet access.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ *  Description
+ *    Retrieve the XFRM state (IP transform framework, see also
+ *    **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ *    The retrieved value is stored in the **struct bpf_xfrm_state**
+ *    pointed by *xfrm_state* and of length *size*.
+ *
+ *    All values for *flags* are reserved for future usage, and must
+ *    be left at zero.
+ *
+ *    This helper is available only if the kernel was compiled with
+ *    **CONFIG_XFRM** configuration option.
+ *  Return
+ *    0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ *  Description
+ *    Return a user or a kernel stack in bpf program provided buffer.
+ *    To achieve this, the helper needs *ctx*, which is a pointer
+ *    to the context on which the tracing program is executed.
+ *    To store the stacktrace, the bpf program provides *buf* with
+ *    a nonnegative *size*.
+ *
+ *    The last argument, *flags*, holds the number of stack frames to
+ *    skip (from 0 to 255), masked with
+ *    **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ *    the following flags:
+ *
+ *    **BPF_F_USER_STACK**
+ *      Collect a user space stack instead of a kernel stack.
+ *    **BPF_F_USER_BUILD_ID**
+ *      Collect buildid+offset instead of ips for user stack,
+ *      only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ *    **bpf_get_stack**\ () can collect up to
+ *    **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ *    to sufficient large buffer size. Note that
+ *    this limit can be controlled with the **sysctl** program, and
+ *    that it should be manually increased in order to profile long
+ *    user stacks (such as stacks for Java programs). To do so, use:
+ *
+ *    ::
+ *
+ *      # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ *  Return
+ *    a non-negative value equal to or less than size on success, or
+ *    a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)   \
   FN(unspec),     \
@@ -744,7 +1860,16 @@ union bpf_attr {
   FN(perf_event_read_value),  \
   FN(perf_prog_read_value), \
   FN(getsockopt),     \
-  FN(override_return),
+  FN(override_return),    \
+  FN(sock_ops_cb_flags_set),  \
+  FN(msg_redirect_map),   \
+  FN(msg_apply_bytes),    \
+  FN(msg_cork_bytes),   \
+  FN(msg_pull_data),    \
+  FN(bind),     \
+  FN(xdp_adjust_tail),    \
+  FN(skb_get_xfrm_state),   \
+  FN(get_stack),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -778,15 +1903,19 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6    (1ULL << 0)
 
-/* BPF_FUNC_get_stackid flags. */
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
 #define BPF_F_SKIP_FIELD_MASK   0xffULL
 #define BPF_F_USER_STACK    (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
 #define BPF_F_FAST_STACK_CMP    (1ULL << 9)
 #define BPF_F_REUSE_STACKID   (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID   (1ULL << 11)
 
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX    (1ULL << 1)
 #define BPF_F_DONT_FRAGMENT   (1ULL << 2)
+#define BPF_F_SEQ_NUMBER    (1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
@@ -849,6 +1978,19 @@ struct bpf_tunnel_key {
   __u32 tunnel_label;
 };
 
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+  __u32 reqid;
+  __u32 spi;  /* Stored in network byte order */
+  __u16 family;
+  union {
+    __u32 remote_ipv4;  /* Stored in network byte order */
+    __u32 remote_ipv6[4]; /* Stored in network byte order */
+  };
+};
+
 /* Generic BPF return codes which all BPF program types may support.
  * The values are binary compatible with their TC_ACT_* counter-part to
  * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -872,6 +2014,15 @@ struct bpf_sock {
   __u32 protocol;
   __u32 mark;
   __u32 priority;
+  __u32 src_ip4;    /* Allows 1,2,4-byte read.
+         * Stored in network byte order.
+         */
+  __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
+         * Stored in network byte order.
+         */
+  __u32 src_port;   /* Allows 4-byte read.
+         * Stored in host byte order
+         */
 };
 
 #define XDP_PACKET_HEADROOM 256
@@ -896,6 +2047,9 @@ struct xdp_md {
   __u32 data;
   __u32 data_end;
   __u32 data_meta;
+  /* Below access go through struct xdp_rxq_info */
+  __u32 ingress_ifindex; /* rxq->dev->ifindex */
+  __u32 rx_queue_index;  /* rxq->queue_index  */
 };
 
 enum sk_action {
@@ -918,6 +2072,10 @@ struct bpf_prog_info {
   __u32 nr_map_ids;
   __aligned_u64 map_ids;
   char name[BPF_OBJ_NAME_LEN];
+  __u32 ifindex;
+  __u32 gpl_compatible:1;
+  __u64 netns_dev;
+  __u64 netns_ino;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -928,8 +2086,31 @@ struct bpf_map_info {
   __u32 max_entries;
   __u32 map_flags;
   char  name[BPF_OBJ_NAME_LEN];
+  __u32 ifindex;
+  __u64 netns_dev;
+  __u64 netns_ino;
 } __attribute__((aligned(8)));
 
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+  __u32 user_family;  /* Allows 4-byte read, but no write. */
+  __u32 user_ip4;   /* Allows 1,2,4-byte read and 4-byte write.
+         * Stored in network byte order.
+         */
+  __u32 user_ip6[4];  /* Allows 1,2,4-byte read an 4-byte write.
+         * Stored in network byte order.
+         */
+  __u32 user_port;  /* Allows 4-byte read and write.
+         * Stored in network byte order
+         */
+  __u32 family;   /* Allows 4-byte read, but no write */
+  __u32 type;   /* Allows 4-byte read, but no write */
+  __u32 protocol;   /* Allows 4-byte read, but no write */
+};
+
 /* User bpf_sock_ops struct to access socket values and specify request ops
  * and their replies.
  * Some of this fields are in network (bigendian) byte order and may need
@@ -939,8 +2120,9 @@ struct bpf_map_info {
 struct bpf_sock_ops {
   __u32 op;
   union {
-    __u32 reply;
-    __u32 replylong[4];
+    __u32 args[4];    /* Optionally passed to bpf program */
+    __u32 reply;    /* Returned by bpf program      */
+    __u32 replylong[4]; /* Optionally returned by bpf prog  */
   };
   __u32 family;
   __u32 remote_ip4; /* Stored in network byte order */
@@ -955,8 +2137,39 @@ struct bpf_sock_ops {
          */
   __u32 snd_cwnd;
   __u32 srtt_us;    /* Averaged RTT << 3 in usecs */
+  __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+  __u32 state;
+  __u32 rtt_min;
+  __u32 snd_ssthresh;
+  __u32 rcv_nxt;
+  __u32 snd_nxt;
+  __u32 snd_una;
+  __u32 mss_cache;
+  __u32 ecn_flags;
+  __u32 rate_delivered;
+  __u32 rate_interval_us;
+  __u32 packets_out;
+  __u32 retrans_out;
+  __u32 total_retrans;
+  __u32 segs_in;
+  __u32 data_segs_in;
+  __u32 segs_out;
+  __u32 data_segs_out;
+  __u32 lost_out;
+  __u32 sacked_out;
+  __u32 sk_txhash;
+  __u64 bytes_received;
+  __u64 bytes_acked;
 };
 
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG  (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG  (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG  (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0x7   /* Mask of all currently
+               * supported cb flags
+               */
+
 /* List of known BPF sock_ops operators.
  * New entries can only be added at the end
  */
@@ -990,6 +2203,43 @@ enum {
            * a congestion threshold. RTTs above
            * this indicate congestion
            */
+  BPF_SOCK_OPS_RTO_CB,    /* Called when an RTO has triggered.
+           * Arg1: value of icsk_retransmits
+           * Arg2: value of icsk_rto
+           * Arg3: whether RTO has expired
+           */
+  BPF_SOCK_OPS_RETRANS_CB,  /* Called when skb is retransmitted.
+           * Arg1: sequence number of 1st byte
+           * Arg2: # segments
+           * Arg3: return value of
+           *       tcp_transmit_skb (0 => success)
+           */
+  BPF_SOCK_OPS_STATE_CB,    /* Called when TCP changes state.
+           * Arg1: old_state
+           * Arg2: new_state
+           */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+  BPF_TCP_ESTABLISHED = 1,
+  BPF_TCP_SYN_SENT,
+  BPF_TCP_SYN_RECV,
+  BPF_TCP_FIN_WAIT1,
+  BPF_TCP_FIN_WAIT2,
+  BPF_TCP_TIME_WAIT,
+  BPF_TCP_CLOSE,
+  BPF_TCP_CLOSE_WAIT,
+  BPF_TCP_LAST_ACK,
+  BPF_TCP_LISTEN,
+  BPF_TCP_CLOSING,  /* Now a valid state */
+  BPF_TCP_NEW_SYN_RECV,
+
+  BPF_TCP_MAX_STATES  /* Leave at the end! */
 };
 
 #define TCP_BPF_IW    1001  /* Set TCP initial congestion window */
diff --git a/katran/lib/linux_includes/bpf_common.h b/katran/lib/linux_includes/bpf_common.h
index e3667e441..899349896 100644
--- a/katran/lib/linux_includes/bpf_common.h
+++ b/katran/lib/linux_includes/bpf_common.h
@@ -3,51 +3,51 @@
 
 /* Instruction classes */
 #define BPF_CLASS(code) ((code) & 0x07)
-// @lint-ignore TXT2 T25377293 Grandfathered in
-#define		BPF_LD		0x00
-#define		BPF_LDX		0x01
-#define		BPF_ST		0x02
-#define		BPF_STX		0x03
-#define		BPF_ALU		0x04
-#define		BPF_JMP		0x05
-#define		BPF_RET		0x06
-#define		BPF_MISC        0x07
+#define   BPF_LD    0x00
+#define   BPF_LDX   0x01
+#define   BPF_ST    0x02
+#define   BPF_STX   0x03
+#define   BPF_ALU   0x04
+#define   BPF_JMP   0x05
+#define   BPF_RET   0x06
+#define   BPF_MISC        0x07
 
 /* ld/ldx fields */
 #define BPF_SIZE(code)  ((code) & 0x18)
-#define		BPF_W		0x00
-#define		BPF_H		0x08
-#define		BPF_B		0x10
+#define   BPF_W   0x00 /* 32-bit */
+#define   BPF_H   0x08 /* 16-bit */
+#define   BPF_B   0x10 /*  8-bit */
+/* eBPF   BPF_DW    0x18    64-bit */
 #define BPF_MODE(code)  ((code) & 0xe0)
-#define		BPF_IMM		0x00
-#define		BPF_ABS		0x20
-#define		BPF_IND		0x40
-#define		BPF_MEM		0x60
-#define		BPF_LEN		0x80
-#define		BPF_MSH		0xa0
+#define   BPF_IMM   0x00
+#define   BPF_ABS   0x20
+#define   BPF_IND   0x40
+#define   BPF_MEM   0x60
+#define   BPF_LEN   0x80
+#define   BPF_MSH   0xa0
 
 /* alu/jmp fields */
 #define BPF_OP(code)    ((code) & 0xf0)
-#define		BPF_ADD		0x00
-#define		BPF_SUB		0x10
-#define		BPF_MUL		0x20
-#define		BPF_DIV		0x30
-#define		BPF_OR		0x40
-#define		BPF_AND		0x50
-#define		BPF_LSH		0x60
-#define		BPF_RSH		0x70
-#define		BPF_NEG		0x80
-#define		BPF_MOD		0x90
-#define		BPF_XOR		0xa0
+#define   BPF_ADD   0x00
+#define   BPF_SUB   0x10
+#define   BPF_MUL   0x20
+#define   BPF_DIV   0x30
+#define   BPF_OR    0x40
+#define   BPF_AND   0x50
+#define   BPF_LSH   0x60
+#define   BPF_RSH   0x70
+#define   BPF_NEG   0x80
+#define   BPF_MOD   0x90
+#define   BPF_XOR   0xa0
 
-#define		BPF_JA		0x00
-#define		BPF_JEQ		0x10
-#define		BPF_JGT		0x20
-#define		BPF_JGE		0x30
-#define		BPF_JSET        0x40
+#define   BPF_JA    0x00
+#define   BPF_JEQ   0x10
+#define   BPF_JGT   0x20
+#define   BPF_JGE   0x30
+#define   BPF_JSET        0x40
 #define BPF_SRC(code)   ((code) & 0x08)
-#define		BPF_K		0x00
-#define		BPF_X		0x08
+#define   BPF_K   0x00
+#define   BPF_X   0x08
 
 #ifndef BPF_MAXINSNS
 #define BPF_MAXINSNS 4096
diff --git a/katran/lib/linux_includes/bpf_helpers.h b/katran/lib/linux_includes/bpf_helpers.h
index e25dbf603..609247e77 100644
--- a/katran/lib/linux_includes/bpf_helpers.h
+++ b/katran/lib/linux_includes/bpf_helpers.h
@@ -9,151 +9,169 @@
 
 /* helper functions called from eBPF programs written in C */
 static void *(*bpf_map_lookup_elem)(void *map, void *key) =
-	(void *) BPF_FUNC_map_lookup_elem;
+  (void *) BPF_FUNC_map_lookup_elem;
 static int (*bpf_map_update_elem)(void *map, void *key, void *value,
-				  unsigned long long flags) =
-	(void *) BPF_FUNC_map_update_elem;
+          unsigned long long flags) =
+  (void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
-	(void *) BPF_FUNC_map_delete_elem;
+  (void *) BPF_FUNC_map_delete_elem;
 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
-	(void *) BPF_FUNC_probe_read;
+  (void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
-	(void *) BPF_FUNC_ktime_get_ns;
+  (void *) BPF_FUNC_ktime_get_ns;
 static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
-	(void *) BPF_FUNC_trace_printk;
+  (void *) BPF_FUNC_trace_printk;
 static void (*bpf_tail_call)(void *ctx, void *map, int index) =
-	(void *) BPF_FUNC_tail_call;
+  (void *) BPF_FUNC_tail_call;
 static unsigned long long (*bpf_get_smp_processor_id)(void) =
-	(void *) BPF_FUNC_get_smp_processor_id;
+  (void *) BPF_FUNC_get_smp_processor_id;
 static unsigned long long (*bpf_get_current_pid_tgid)(void) =
-	(void *) BPF_FUNC_get_current_pid_tgid;
+  (void *) BPF_FUNC_get_current_pid_tgid;
 static unsigned long long (*bpf_get_current_uid_gid)(void) =
-	(void *) BPF_FUNC_get_current_uid_gid;
+  (void *) BPF_FUNC_get_current_uid_gid;
 static int (*bpf_get_current_comm)(void *buf, int buf_size) =
-	(void *) BPF_FUNC_get_current_comm;
+  (void *) BPF_FUNC_get_current_comm;
 static unsigned long long (*bpf_perf_event_read)(void *map,
-						 unsigned long long flags) =
-	(void *) BPF_FUNC_perf_event_read;
+             unsigned long long flags) =
+  (void *) BPF_FUNC_perf_event_read;
 static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
-	(void *) BPF_FUNC_clone_redirect;
+  (void *) BPF_FUNC_clone_redirect;
 static int (*bpf_redirect)(int ifindex, int flags) =
-	(void *) BPF_FUNC_redirect;
+  (void *) BPF_FUNC_redirect;
 static int (*bpf_redirect_map)(void *map, int key, int flags) =
-	(void *) BPF_FUNC_redirect_map;
+  (void *) BPF_FUNC_redirect_map;
 static int (*bpf_perf_event_output)(void *ctx, void *map,
-				    unsigned long long flags, void *data,
-				    int size) =
-	(void *) BPF_FUNC_perf_event_output;
+            unsigned long long flags, void *data,
+            int size) =
+  (void *) BPF_FUNC_perf_event_output;
 static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
-	(void *) BPF_FUNC_get_stackid;
+  (void *) BPF_FUNC_get_stackid;
 static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
-	(void *) BPF_FUNC_probe_write_user;
+  (void *) BPF_FUNC_probe_write_user;
 static int (*bpf_current_task_under_cgroup)(void *map, int index) =
-	(void *) BPF_FUNC_current_task_under_cgroup;
+  (void *) BPF_FUNC_current_task_under_cgroup;
 static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
-	(void *) BPF_FUNC_skb_get_tunnel_key;
+  (void *) BPF_FUNC_skb_get_tunnel_key;
 static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
-	(void *) BPF_FUNC_skb_set_tunnel_key;
+  (void *) BPF_FUNC_skb_set_tunnel_key;
 static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
-	(void *) BPF_FUNC_skb_get_tunnel_opt;
+  (void *) BPF_FUNC_skb_get_tunnel_opt;
 static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
-	(void *) BPF_FUNC_skb_set_tunnel_opt;
+  (void *) BPF_FUNC_skb_set_tunnel_opt;
 static unsigned long long (*bpf_get_prandom_u32)(void) =
-	(void *) BPF_FUNC_get_prandom_u32;
+  (void *) BPF_FUNC_get_prandom_u32;
 static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
-	(void *) BPF_FUNC_xdp_adjust_head;
+  (void *) BPF_FUNC_xdp_adjust_head;
 static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
-	(void *) BPF_FUNC_xdp_adjust_meta;
+  (void *) BPF_FUNC_xdp_adjust_meta;
 static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
-			     int optlen) =
-	(void *) BPF_FUNC_setsockopt;
+           int optlen) =
+  (void *) BPF_FUNC_setsockopt;
 static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
-	(void *) BPF_FUNC_sk_redirect_map;
+  (void *) BPF_FUNC_sk_redirect_map;
+static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
+                             int optlen) =
+  (void *) BPF_FUNC_getsockopt;
+static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
+  (void *) BPF_FUNC_sock_ops_cb_flags_set;
 static int (*bpf_sock_map_update)(void *map, void *key, void *value,
-				  unsigned long long flags) =
-	(void *) BPF_FUNC_sock_map_update;
+          unsigned long long flags) =
+  (void *) BPF_FUNC_sock_map_update;
 static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
-					void *buf, unsigned int buf_size) =
-	(void *) BPF_FUNC_perf_event_read_value;
+          void *buf, unsigned int buf_size) =
+  (void *) BPF_FUNC_perf_event_read_value;
 static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
-				       unsigned int buf_size) =
-	(void *) BPF_FUNC_perf_prog_read_value;
-
-
+               unsigned int buf_size) =
+  (void *) BPF_FUNC_perf_prog_read_value;
+static int (*bpf_override_return)(void *ctx, unsigned long rc) =
+  (void *) BPF_FUNC_override_return;
+#ifdef KERNEL_417_PLUS
+static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
+  (void *) BPF_FUNC_bind;
+static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+  (void *) BPF_FUNC_xdp_adjust_tail;
+static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
+                                     int size, int flags) =
+  (void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
+  (void *) BPF_FUNC_get_stack;
+#endif
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
  */
 struct sk_buff;
 unsigned long long load_byte(void *skb,
-			     unsigned long long off) asm("llvm.bpf.load.byte");
+           unsigned long long off) asm("llvm.bpf.load.byte");
 unsigned long long load_half(void *skb,
-			     unsigned long long off) asm("llvm.bpf.load.half");
+           unsigned long long off) asm("llvm.bpf.load.half");
 unsigned long long load_word(void *skb,
-			     unsigned long long off) asm("llvm.bpf.load.word");
+           unsigned long long off) asm("llvm.bpf.load.word");
 
 /* a helper structure used by eBPF C program
  * to describe map attributes to elf_bpf loader
  */
 struct bpf_map_def {
-	unsigned int type;
-	unsigned int key_size;
-	unsigned int value_size;
-	unsigned int max_entries;
-	unsigned int map_flags;
-	unsigned int inner_map_idx;
-	unsigned int numa_node;
+  unsigned int type;
+  unsigned int key_size;
+  unsigned int value_size;
+  unsigned int max_entries;
+  unsigned int map_flags;
+  unsigned int inner_map_idx;
+  unsigned int numa_node;
 };
 
 static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
-	(void *) BPF_FUNC_skb_load_bytes;
+  (void *) BPF_FUNC_skb_load_bytes;
 static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
-	(void *) BPF_FUNC_skb_store_bytes;
+  (void *) BPF_FUNC_skb_store_bytes;
 static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
-	(void *) BPF_FUNC_l3_csum_replace;
+  (void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
-	(void *) BPF_FUNC_l4_csum_replace;
+  (void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
+  (void *) BPF_FUNC_csum_diff;
 static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
-	(void *) BPF_FUNC_skb_under_cgroup;
+  (void *) BPF_FUNC_skb_under_cgroup;
 static int (*bpf_skb_change_head)(void *, int len, int flags) =
-	(void *) BPF_FUNC_skb_change_head;
+  (void *) BPF_FUNC_skb_change_head;
 
 /* Scan the ARCH passed in from ARCH env variable (see Makefile) */
 #if defined(__TARGET_ARCH_x86)
-	#define bpf_target_x86
-	#define bpf_target_defined
+  #define bpf_target_x86
+  #define bpf_target_defined
 #elif defined(__TARGET_ARCH_s930x)
-	#define bpf_target_s930x
-	#define bpf_target_defined
+  #define bpf_target_s930x
+  #define bpf_target_defined
 #elif defined(__TARGET_ARCH_arm64)
-	#define bpf_target_arm64
-	#define bpf_target_defined
+  #define bpf_target_arm64
+  #define bpf_target_defined
 #elif defined(__TARGET_ARCH_mips)
-	#define bpf_target_mips
-	#define bpf_target_defined
+  #define bpf_target_mips
+  #define bpf_target_defined
 #elif defined(__TARGET_ARCH_powerpc)
-	#define bpf_target_powerpc
-	#define bpf_target_defined
+  #define bpf_target_powerpc
+  #define bpf_target_defined
 #elif defined(__TARGET_ARCH_sparc)
-	#define bpf_target_sparc
-	#define bpf_target_defined
+  #define bpf_target_sparc
+  #define bpf_target_defined
 #else
-	#undef bpf_target_defined
+  #undef bpf_target_defined
 #endif
 
 /* Fall back to what the compiler says */
 #ifndef bpf_target_defined
 #if defined(__x86_64__)
-	#define bpf_target_x86
+  #define bpf_target_x86
 #elif defined(__s390x__)
-	#define bpf_target_s930x
+  #define bpf_target_s930x
 #elif defined(__aarch64__)
-	#define bpf_target_arm64
+  #define bpf_target_arm64
 #elif defined(__mips__)
-	#define bpf_target_mips
+  #define bpf_target_mips
 #elif defined(__powerpc__)
-	#define bpf_target_powerpc
+  #define bpf_target_powerpc
 #elif defined(__sparc__)
-	#define bpf_target_sparc
+  #define bpf_target_sparc
 #endif
 #endif
 
@@ -241,17 +259,17 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
 #endif
 
 #ifdef bpf_target_powerpc
-#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; })
-#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)   ({ (ip) = (ctx)->link; })
+#define BPF_KRETPROBE_READ_RET_IP   BPF_KPROBE_READ_RET_IP
 #elif bpf_target_sparc
-#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })
-#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)   ({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP   BPF_KPROBE_READ_RET_IP
 #else
-#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({				\
-		bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
-#define BPF_KRETPROBE_READ_RET_IP(ip, ctx)	({				\
-		bpf_probe_read(&(ip), sizeof(ip),				\
-				(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)   ({        \
+    bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx)  ({        \
+    bpf_probe_read(&(ip), sizeof(ip),       \
+        (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
 #endif
 
 #endif
diff --git a/katran/lib/linux_includes/libbpf.c b/katran/lib/linux_includes/libbpf.c
index c907686f3..e44e70889 100644
--- a/katran/lib/linux_includes/libbpf.c
+++ b/katran/lib/linux_includes/libbpf.c
@@ -20,12 +20,12 @@ static __u64 ptr_to_u64(const void *ptr)
 }
 
 static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
-			  unsigned int size)
+        unsigned int size)
 {
-	return syscall(__NR_bpf, cmd, attr, size);
+  return syscall(__NR_bpf, cmd, attr, size);
 }
 
-int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+int ebpf_create_map_node(enum bpf_map_type map_type, const char *name,
                         int key_size, int value_size, int max_entries,
                         __u32 map_flags, int node)
 {
@@ -50,25 +50,25 @@ int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
   return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
-int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
-                        int key_size, int value_size, int max_entries,
-                        __u32 map_flags)
+int ebpf_create_map_name(enum bpf_map_type map_type, const char *name,
+                         int key_size, int value_size, int max_entries,
+                         __u32 map_flags)
 {
-  return bpf_create_map_node(map_type, name, key_size, value_size,
-                             max_entries, map_flags, -1);
+  return ebpf_create_map_node(map_type, name, key_size, value_size,
+                              max_entries, map_flags, -1);
 }
 
 
-int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
-                   int max_entries, __u32 map_flags)
+int ebpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
+                    int max_entries, __u32 map_flags)
 {
-  return bpf_create_map_node(map_type, NULL, key_size, value_size,
-                             max_entries, map_flags, -1);
+  return ebpf_create_map_node(map_type, NULL, key_size, value_size,
+                              max_entries, map_flags, -1);
 }
 
-int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
-                               int key_size, int inner_map_fd, int max_entries,
-                               __u32 map_flags, int node)
+int ebpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+                                int key_size, int inner_map_fd, int max_entries,
+                                __u32 map_flags, int node)
 {
   __u32 name_len = name ? strlen(name) : 0;
   union bpf_attr attr;
@@ -93,14 +93,14 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
 
 
 
-int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
+int ebpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
         int inner_map_fd, int max_entries, __u32 map_flags)
 {
-  return bpf_create_map_in_map_node(map_type, NULL, key_size,
-                                    inner_map_fd, max_entries, map_flags, -1);
+  return ebpf_create_map_in_map_node(map_type, NULL, key_size,
+                                     inner_map_fd, max_entries, map_flags, -1);
 }
 
-int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
+int ebpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
 {
   union bpf_attr attr;
 
@@ -113,7 +113,7 @@ int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
   return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
 }
 
-int bpf_lookup_elem(int fd, void *key, void *value)
+int ebpf_lookup_elem(int fd, void *key, void *value)
 {
   union bpf_attr attr;
 
@@ -125,7 +125,7 @@ int bpf_lookup_elem(int fd, void *key, void *value)
   return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
 }
 
-int bpf_delete_elem(int fd, void *key)
+int ebpf_delete_elem(int fd, void *key)
 {
   union bpf_attr attr;
 
@@ -136,7 +136,7 @@ int bpf_delete_elem(int fd, void *key)
   return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
 }
 
-int bpf_get_next_key(int fd, void *key, void *next_key)
+int ebpf_get_next_key(int fd, void *key, void *next_key)
 {
   union bpf_attr attr;
 
@@ -150,49 +150,49 @@ int bpf_get_next_key(int fd, void *key, void *next_key)
 
 #define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
 
-int bpf_prog_load_name(enum bpf_prog_type prog_type, const char *name,
-                       const struct bpf_insn *insns, int prog_len,
-                       const char *license, __u32 kern_version,
-                       char *buf, int buf_size)
+int ebpf_prog_load_name(enum bpf_prog_type prog_type, const char *name,
+                        const struct bpf_insn *insns, int prog_len,
+                        const char *license, __u32 kern_version,
+                        char *buf, int buf_size)
 {
-	int fd;
-	union bpf_attr attr;
+  int fd;
+  union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
-	attr.prog_type = prog_type;
-	attr.insns = ptr_to_u64(insns);
+  bzero(&attr, sizeof(attr));
+  attr.prog_type = prog_type;
+  attr.insns = ptr_to_u64(insns);
   attr.insn_cnt = prog_len / sizeof(struct bpf_insn),
-	attr.license = ptr_to_u64(license);
-	attr.log_buf = ptr_to_u64(NULL);
-	attr.log_size = 0;
-	attr.log_level = 0;
-	attr.kern_version = kern_version;
+  attr.license = ptr_to_u64(license);
+  attr.log_buf = ptr_to_u64(NULL);
+  attr.log_size = 0;
+  attr.log_level = 0;
+  attr.kern_version = kern_version;
   if (name) {
     memcpy(attr.prog_name, name, min(strlen(name), BPF_OBJ_NAME_LEN - 1));
   }
 
   fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
-	if (fd >= 0 || !buf || !buf_size)
-		return fd;
+  if (fd >= 0 || !buf || !buf_size)
+    return fd;
 
-	/* Try again with log */
-	attr.log_buf = ptr_to_u64(buf);
-	attr.log_size = buf_size;
-	attr.log_level = 1;
+  /* Try again with log */
+  attr.log_buf = ptr_to_u64(buf);
+  attr.log_size = buf_size;
+  attr.log_level = 1;
   return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
 }
 
 
-int bpf_prog_load(enum bpf_prog_type prog_type,
-                  const struct bpf_insn *insns, int prog_len,
-                  const char *license, __u32 kern_version,
-                  char *buf, int buf_size)
+int ebpf_prog_load(enum bpf_prog_type prog_type,
+                   const struct bpf_insn *insns, int prog_len,
+                   const char *license, __u32 kern_version,
+                   char *buf, int buf_size)
 {
-  return bpf_prog_load_name(prog_type, NULL, insns, prog_len, license,
-				                       kern_version, buf, buf_size);
+  return ebpf_prog_load_name(prog_type, NULL, insns, prog_len, license,
+                               kern_version, buf, buf_size);
 }
 
-int bpf_obj_pin(int fd, const char *pathname)
+int ebpf_obj_pin(int fd, const char *pathname)
 {
   union bpf_attr attr;
 
@@ -203,7 +203,7 @@ int bpf_obj_pin(int fd, const char *pathname)
   return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
 }
 
-int bpf_obj_get(const char *pathname)
+int ebpf_obj_get(const char *pathname)
 {
   union bpf_attr attr;
 
@@ -213,8 +213,8 @@ int bpf_obj_get(const char *pathname)
   return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
 }
 
-int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
-                    unsigned int flags)
+int ebpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+                     unsigned int flags)
 {
   union bpf_attr attr;
 
@@ -227,7 +227,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
   return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
 }
 
-int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+int ebpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
   union bpf_attr attr;
 
@@ -238,7 +238,7 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
   return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
 }
 
-int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+int ebpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
 {
   union bpf_attr attr;
 
@@ -250,9 +250,9 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
   return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
 }
 
-int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
-          void *data_out, __u32 *size_out, __u32 *retval,
-          __u32 *duration)
+int ebpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+           void *data_out, __u32 *size_out, __u32 *retval,
+           __u32 *duration)
 {
   union bpf_attr attr;
   int ret;
@@ -274,7 +274,7 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
   return ret;
 }
 
-int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+int ebpf_perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
         int group_fd, unsigned long flags)
 {
   return syscall(__NR_perf_event_open, attr, pid, cpu,
diff --git a/katran/lib/linux_includes/libbpf.h b/katran/lib/linux_includes/libbpf.h
index c84d6dba2..81053361f 100644
--- a/katran/lib/linux_includes/libbpf.h
+++ b/katran/lib/linux_includes/libbpf.h
@@ -16,42 +16,42 @@
 
 struct bpf_insn;
 
-int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+int ebpf_create_map_node(enum bpf_map_type map_type, const char *name,
                         int key_size, int value_size, int max_entries,
                         __u32 map_flags, int node);
-int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+int ebpf_create_map_name(enum bpf_map_type map_type, const char *name,
                         int key_size, int value_size, int max_entries,
                         __u32 map_flags);
-int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
+int ebpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
                    int max_entries, __u32 map_flags);
-int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+int ebpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
                                int key_size, int inner_map_fd, int max_entries,
                                __u32 map_flags, int node);
-int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
+int ebpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
                           int inner_map_fd, int max_entries, __u32 map_flags);
 
-int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
-int bpf_lookup_elem(int fd, void *key, void *value);
-int bpf_delete_elem(int fd, void *key);
-int bpf_get_next_key(int fd, void *key, void *next_key);
+int ebpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
+int ebpf_lookup_elem(int fd, void *key, void *value);
+int ebpf_delete_elem(int fd, void *key);
+int ebpf_get_next_key(int fd, void *key, void *next_key);
 
-int bpf_prog_load(enum bpf_prog_type prog_type,
+int ebpf_prog_load(enum bpf_prog_type prog_type,
                   const struct bpf_insn *insns, int insn_len,
                   const char *license, __u32 kern_version,
                   char *buf, int buf_size);
 
-int bpf_prog_load_name(enum bpf_prog_type prog_type, const char *name,
+int ebpf_prog_load_name(enum bpf_prog_type prog_type, const char *name,
                        const struct bpf_insn *insns, int insn_len,
                        const char *license, __u32 kern_version,
                        char *buf, int buf_size);
 
-int bpf_obj_pin(int fd, const char *pathname);
-int bpf_obj_get(const char *pathname);
-int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+int ebpf_obj_pin(int fd, const char *pathname);
+int ebpf_obj_get(const char *pathname);
+int ebpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
                     unsigned int flags);
-int bpf_prog_detach(int target_fd, enum bpf_attach_type type);
-int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type);
-int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+int ebpf_prog_detach(int target_fd, enum bpf_attach_type type);
+int ebpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type);
+int ebpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
                       void *data_out, __u32 *size_out, __u32 *retval,
                       __u32 *duration);
 
@@ -236,6 +236,6 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
     .imm   = 0 })
 
 struct perf_event_attr;
-int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+int ebpf_perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
         int group_fd, unsigned long flags);
 #endif
diff --git a/katran/lib/testing/CMakeLists.txt b/katran/lib/testing/CMakeLists.txt
index 940837586..a8b6a5ad3 100644
--- a/katran/lib/testing/CMakeLists.txt
+++ b/katran/lib/testing/CMakeLists.txt
@@ -32,6 +32,7 @@ add_library(xdptester STATIC
     XdpTester.h
     XdpTester.cpp
     KatranTestFixtures.h
+    KatranOptionalTestFixtures.h
 )
 
 target_link_libraries(xdptester
diff --git a/katran/lib/testing/KatranOptionalTestFixtures.h b/katran/lib/testing/KatranOptionalTestFixtures.h
new file mode 100644
index 000000000..c9a123a79
--- /dev/null
+++ b/katran/lib/testing/KatranOptionalTestFixtures.h
@@ -0,0 +1,59 @@
+// @nolint
+
+/* Copyright (C) 2018-present, Facebook, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#pragma once
+#include <string>
+#include <vector>
+#include <utility>
+
+namespace katran {
+namespace testing {
+/**
+ * see KatranTestFixtures.h on how to generate input and output data
+ */
+using TestFixture = std::vector<std::pair<std::string, std::string>>;
+const TestFixture inputOptionalTestFixtures = {
+  //1
+  {
+    // Ether(src="0x1", dst="0x2")/IP(src="192.168.1.1", dst="10.200.1.1")/UDP(sport=31337, dport=80)/("katran test pkt"*100)
+    "AgAAAAAAAQAAAAAACABFAAX4AAEAAEARp4LAqAEBCsgBAXppAFAF5Og2a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0a2F0cmFuIHRlc3QgcGt0",
+    "ICMPv4 packet too big. ICMP_TOOBIG_GENERATION and 4.17+ kernel is required"
+  },
+  //2
+  {
+    //Ether(src="0x1", dst="0x2")/IPv6(src="fc00:2::1", dst="fc00:1::1")/TCP(sport=31337, dport=80,flags="A")/("katran test pkt"*100)
+    "AgAAAAAAAQAAAAAAht1gAAAABfAGQPwAAAIAAAAAAAAAAAAAAAH8AAABAAAAAAAAAAAAAAABemkAUAAAAAAAAAAAUBAgAFN1AABrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3Q=",
+    "ICMPv6 packet too big. ICMP_TOOBIG_GENERATION and 4.17+ kernel is required"
+  },
+};
+
+const TestFixture outputOptionalTestFixtures = {
+  //1
+  {
+    "AQAAAAAAAgAAAAAACABFAABwAAAAAEABrRsKyAEBwKgBAQMEboQAAAXcRQAF+AABAABAEaeCwKgBAQrIAQF6aQBQBeToNmthdHJhbiB0ZXN0IHBrdGthdHJhbiB0ZXN0IHBrdGthdHJhbiB0ZXN0IHBrdGthdHJhbiB0ZXN0",
+    "XDP_TX"
+  },
+  //2
+  {
+    "AQAAAAAAAgAAAAAAht1gAAAAAQA6QPwAAAEAAAAAAAAAAAAAAAH8AAACAAAAAAAAAAAAAAABAgD3sgAABdxgAAAABfAGQPwAAAIAAAAAAAAAAAAAAAH8AAABAAAAAAAAAAAAAAABemkAUAAAAAAAAAAAUBAgAFN1AABrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdGVzdCBwa3RrYXRyYW4gdA==",
+    "XDP_TX"
+  },
+};
+
+}
+}
diff --git a/katran/lib/testing/TARGETS b/katran/lib/testing/TARGETS
index d557b2799..7e4df7a07 100644
--- a/katran/lib/testing/TARGETS
+++ b/katran/lib/testing/TARGETS
@@ -60,6 +60,7 @@ cpp_binary(
         "katran_tester.cpp",
     ],
     headers = [
+        "KatranOptionalTestFixtures.h",
         "KatranTestFixtures.h",
     ],
     deps = [
diff --git a/katran/lib/testing/XdpTester.cpp b/katran/lib/testing/XdpTester.cpp
index 67a292a2a..0335f7b3c 100644
--- a/katran/lib/testing/XdpTester.cpp
+++ b/katran/lib/testing/XdpTester.cpp
@@ -58,6 +58,18 @@ void XdpTester::printPcktBase64() {
   }
 }
 
+void XdpTester::writePcapOutput(std::unique_ptr<folly::IOBuf>&& buf) {
+  if (config_.outputFileName.empty()) {
+    VLOG(2) << "no output file specified";
+    return;
+  }
+  auto success = parser_.writePacket(std::move(buf));
+  if (!success) {
+    LOG(INFO) << "failed to write pckt into output "
+              << "pcap file: " << config_.outputFileName;
+  }
+}
+
 void XdpTester::testPcktsFromPcap() {
   if (config_.inputFileName.empty() || config_.bpfProgFd < 0) {
     LOG(INFO) << "can't run pcap based tests. input pcap file or bpf prog fd "
@@ -96,13 +108,7 @@ void XdpTester::testPcktsFromPcap() {
     }
     // adjust IOBuf so data data_end will acount for writen data
     buf->append(output_pckt_size);
-    if (!config_.outputFileName.empty()) {
-      auto success = parser_.writePacket(buf->cloneOne());
-      if (!success) {
-        LOG(INFO) << "failed to write pckt #" << pckt_num << " into output "
-                  << "pcap file: " << config_.outputFileName;
-      }
-    }
+    writePcapOutput(buf->cloneOne());
     ++pckt_num;
   }
 }
@@ -122,6 +128,7 @@ void XdpTester::testFromFixture() {
   for (int i = 0; i < config_.inputData.size(); i++) {
     auto buf = folly::IOBuf::create(kMaxXdpPcktSize);
     auto input_pckt = parser_.getPacketFromBase64(config_.inputData[i].first);
+    writePcapOutput(input_pckt->cloneOne());
     auto res = adapter_.testXdpProg(
         config_.bpfProgFd,
         kTestRepeatCount,
@@ -142,6 +149,7 @@ void XdpTester::testFromFixture() {
     }
     // adjust IOBuf so data data_end will acount for writen data
     buf->append(output_pckt_size);
+    writePcapOutput(buf->cloneOne());
     if (ret_val_str != config_.outputData[i].second) {
       VLOG(2) << "value from test: " << ret_val_str
               << " expected: " << config_.outputData[i].second;
@@ -161,6 +169,14 @@ void XdpTester::testFromFixture() {
   }
 }
 
+void XdpTester::resetTestFixtures(
+    const std::vector<std::pair<std::string, std::string>>& inputData,
+    const std::vector<std::pair<std::string, std::string>>& outputData) {
+  //
+  config_.inputData = inputData;
+  config_.outputData = outputData;
+}
+
 void XdpTester::testPerfFromFixture(uint32_t repeat, const int position) {
   // for inputData format is <pckt_base64, test description>
   int first_index{0}, last_index{0};
diff --git a/katran/lib/testing/XdpTester.h b/katran/lib/testing/XdpTester.h
index 7ff6aba70..022c25e57 100644
--- a/katran/lib/testing/XdpTester.h
+++ b/katran/lib/testing/XdpTester.h
@@ -98,6 +98,15 @@ class XdpTester {
    */
   void testFromFixture();
 
+  /**
+   * @param vector<string, string> new input fixtures
+   * @param vector<string, string> new output fixtures
+   * helper function which set test fixtures to new values
+   */
+  void resetTestFixtures(
+      const std::vector<std::pair<std::string, std::string>>& inputData,
+      const std::vector<std::pair<std::string, std::string>>& outputData);
+
   /**
    * @param int repeat      how many time should we repeat the test
    * @param int position    of the packet if fixtures vector.
@@ -106,6 +115,13 @@ class XdpTester {
    */
   void testPerfFromFixture(uint32_t repeat, const int position = -1);
 
+  /**
+   * @param IOBuf with packet data to write.
+   *
+   * helper function to write packet in pcap format to specified outputFilenName
+   */
+  void writePcapOutput(std::unique_ptr<folly::IOBuf>&& buf);
+
  private:
   TesterConfig config_;
   PcapParser parser_;
diff --git a/katran/lib/testing/katran_tester.cpp b/katran/lib/testing/katran_tester.cpp
index 72130c14f..984d18e3f 100644
--- a/katran/lib/testing/katran_tester.cpp
+++ b/katran/lib/testing/katran_tester.cpp
@@ -22,6 +22,7 @@
 #include <gflags/gflags.h>
 
 #include "KatranTestFixtures.h"
+#include "KatranOptionalTestFixtures.h"
 #include "XdpTester.h"
 #include "katran/lib/KatranLb.h"
 #include "katran/lib/KatranLbStructs.h"
@@ -33,6 +34,7 @@ DEFINE_string(healtchecking_prog, "", "path to healthchecking bpf prog");
 DEFINE_bool(print_base64, false, "print packets in base64 from pcap file");
 DEFINE_bool(test_from_fixtures, false, "run tests on predefined dataset");
 DEFINE_bool(perf_testing, false, "run perf tests on predefined dataset");
+DEFINE_bool(optional_tests, false, "run optional (kernel specific) tests");
 DEFINE_int32(repeat, 1000000, "perf test runs for single packet");
 DEFINE_int32(position, -1, "perf test runs for single packet");
 
@@ -143,6 +145,19 @@ void prepareLbData(katran::KatranLb& lb) {
   addReals(lb, vip, reals6);
 }
 
+void prepareOptionalLbData(katran::KatranLb& /* unused */) {
+}
+
+void testOptionalLbCounters(katran::KatranLb& lb) {
+  LOG(INFO) << "Testing optional counter's sanity";
+  auto stats = lb.getIcmpTooBigStats();
+  if (stats.v1 != 1 || stats.v2 != 1) {
+    VLOG(2) << "icmpV4 hits: " << stats.v1 << " icmpv6 hits:" << stats.v2;
+    LOG(INFO) << "icmp packet too big counter is incorrect";
+  }
+  LOG(INFO) << "Testing of optional counters is complite";
+}
+
 void testLbCounters(katran::KatranLb& lb) {
   katran::VipKey vip;
   vip.address = "10.200.1.1";
@@ -211,6 +226,15 @@ int main(int argc, char** argv) {
   } else if (FLAGS_test_from_fixtures) {
     tester.testFromFixture();
     testLbCounters(lb);
+    if (FLAGS_optional_tests) {
+      LOG(INFO) << "Running optional tests. they could fail if requirements "
+                << "are not satisfied";
+      tester.resetTestFixtures(
+        katran::testing::inputOptionalTestFixtures,
+        katran::testing::outputOptionalTestFixtures);
+      tester.testFromFixture();
+      testOptionalLbCounters(lb);
+    }
     return 0;
   } else if (FLAGS_perf_testing) {
     tester.testPerfFromFixture(FLAGS_repeat, FLAGS_position);