diff --git a/intel_pytorch_extension_py/launch.py b/intel_pytorch_extension_py/launch.py
index c61802152..da10907bf 100644
--- a/intel_pytorch_extension_py/launch.py
+++ b/intel_pytorch_extension_py/launch.py
@@ -8,6 +8,7 @@
 import glob
 import numpy as np
 from argparse import ArgumentParser, REMAINDER
+THP_LOC = "/sys/kernel/mm/transparent_hugepage/enabled"
 
 r"""
 This is a script for launching PyTorch training and inference on Intel Xeon CPU with optimal configurations.
@@ -91,6 +92,7 @@
 *** Memory allocator  ***
 
 "--enable_tcmalloc" and "--enable_jemalloc" can be used to enable different memory allcator. 
+"--set_thp" can be used to set Transparent HugePages setting.
 
 """
 
@@ -212,15 +214,16 @@ def add_lib_preload(lib_type=None):
                      "/usr/local/lib64/", "/usr/lib/", "/usr/lib64/"]
     lib_find = False
     for lib_path in library_paths:
-        library_file = lib_path + "lib" + lib_type + ".so"
-        matches = glob.glob(library_file)
-        if len(matches) > 0:
-            if "LD_PRELOAD" in os.environ:
-                os.environ["LD_PRELOAD"] = matches[0] + ":" + os.environ["LD_PRELOAD"]
-            else:
-                os.environ["LD_PRELOAD"] = matches[0]
-            lib_find = True
-            break
+        for each_lib_type in lib_type.split(","):
+            library_file = lib_path + "lib" + each_lib_type + ".so"
+            matches = glob.glob(library_file)
+            if len(matches) > 0:
+                if "LD_PRELOAD" in os.environ:
+                    os.environ["LD_PRELOAD"] = matches[0] + ":" + os.environ["LD_PRELOAD"]
+                else:
+                    os.environ["LD_PRELOAD"] = matches[0]
+                lib_find = True
+                break
 
     if not lib_find:
         # Unable to find the TCMalloc library file
@@ -228,6 +231,22 @@ def add_lib_preload(lib_type=None):
                " or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib64 or "
                "~/.local/lib/ so the LD_PRELOAD environment variable will not be set."
                .format(lib_type, lib_type, expanduser("~")))
+
+def get_thp():
+    if os.path.exists(THP_LOC):
+        with open(THP_LOC) as f:
+           val = f.read().strip().split("[")[1].split("]")[0]
+           if val in ['always','never','madvise']:
+               return val
+    return None
+
+def set_thp(arg):
+    if not arg: return
+    if os.path.exists(THP_LOC):
+        os.system("sync;echo 3 > /proc/sys/vm/drop_caches")
+        os.system(f"echo {arg} > {THP_LOC}")
+    else:
+        print("Warning: Unable to enable Transparent HugePages.")
     
 def set_multi_thread_and_allcator(args):
 
@@ -254,7 +273,10 @@ def set_multi_thread_and_allcator(args):
     if args.enable_jemalloc:
         add_lib_preload(lib_type="jemalloc")
     if args.enable_iomp:
-        add_lib_preload(lib_type="iomp")
+        add_lib_preload(lib_type="iomp,iomp5")
+    if args.set_thp:
+        set_thp(args.set_thp)
+        os.environ["LAUNCH_THP_SET"] = get_thp()
  
 def launch(args):
     '''
@@ -288,34 +310,38 @@ def launch(args):
         if args.ncore_per_instance == -1:
             args.ncore_per_instance = len(cores) // args.ninstances
 
+    os.environ["LAUNCH_THP_OLD"] = get_thp()
     set_multi_thread_and_allcator(args)
 
+    os.environ["LAUNCH_CMD"] = "#"
     for i in range(args.ninstances):
        cmd = []
        cur_process_cores = ""
-       if args.disable_numactl: 
-           cmd.append(args.program)
-           cmd.extend(args.program_args)
-       else:
+       if not args.disable_numactl:
            cmd = ["numactl"]
            for core in cores[i * args.ncore_per_instance:(i + 1) * args.ncore_per_instance]:
                cur_process_cores = cur_process_cores + str(core) + ","
            numa_params = "-C {} ".format(cur_process_cores[:-1])
            cmd.extend(numa_params.split())
-           with_python = not args.no_python
-           if with_python:
-               cmd.append(sys.executable)
-           if args.module:
-               cmd.append("-m")
-           cmd.append(args.program)
-           cmd.extend(args.program_args) 
+       with_python = not args.no_python
+       if with_python:
+           cmd.append(sys.executable)
+       if args.module:
+           cmd.append("-m")
+       cmd.append(args.program)
+       cmd.extend(args.program_args)
+       os.environ["LAUNCH_CMD"] += " ".join(cmd) + ",#"
        process = subprocess.Popen(cmd, env=os.environ)
        processes.append(process)
+    os.environ["LAUNCH_CMD"] = os.environ["LAUNCH_CMD"][:-2]
     for process in processes:
         process.wait()
         if process.returncode != 0:
             raise subprocess.CalledProcessError(returncode=process.returncode,
                                                 cmd=cmd) 
+    if args.set_thp:
+        # reset to existing val
+        set_thp(os.environ["LAUNCH_THP_OLD"])
     
 def mpi_dist_launch(args):
     '''
@@ -417,6 +443,8 @@ def add_memory_allocator_params(parser):
                         help="Enable tcmalloc allocator")
     group.add_argument("--enable_jemalloc", action='store_true', default=False,
                         help="Enable jemalloc allocator")
+    group.add_argument("--set_thp", default=None, choices=['always','madvise','never'],
+                        help="Set Transparent HugePages setting")
         
 def add_multi_instance_params(parser):
 
@@ -463,6 +491,10 @@ def parse_args():
     parser.add_argument("--no_python", default=False, action="store_true",
                         help="Do not prepend the --program script with \"python\" - just exec "
                              "it directly. Useful when the script is not a Python script.")
+    parser.add_argument("--print_env", default=False, action="store_true",
+                        help="print modified env configuration")
+    parser.add_argument("--dnnverbose", default=False, action="store_true",
+                        help="enable verbose log for OneDNN and OneMKL")
     add_memory_allocator_params(parser)
     add_kmp_iomp_params(parser)
      
@@ -481,14 +513,24 @@ def parse_args():
 
 def main():
 
+    env_before = set(os.environ.keys())
     if platform.system() == "Windows":
         raise RuntimeError("Windows platform is not supported!!!")
 
     args = parse_args()
+    if args.dnnverbose:
+        os.environ["DNNL_VERBOSE"] = "1"
+        os.environ["MKLDNN_VERBOSE"] = "1"
+        os.environ["MKL_VERBOSE"] = "1"
     if args.distributed:
         mpi_dist_launch(args)
     else:
         launch(args)
+
+    if args.print_env:
+        print(f'Launch settings: ')
+        for x in sorted(set(os.environ.keys()) - env_before):
+            print(f'{x}={os.environ[x]}')
  
 if __name__ == "__main__":
     main()