==22725== NVPROF is profiling process 22725, command: ./cachebench-tex-loads ==22725== Some kernel(s) will be replayed on device 0 in order to collect all events/metrics. ==22725== Profiling application: ./cachebench-tex-loads ==22725== Profiling result: ==22725== Metric result: "Device","Kernel","Invocations","Metric Name","Metric Description","Min","Max","Avg" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",10178.000000,10178.000000,10178.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.743000%,98.743000%,98.743000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000574,0.000574,0.000574 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",2278894,2278894,2278894 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772204,167772204,167772204 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",296409,296409,296409 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",16878935,16878935,16878935 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1172.842779GB/s,1172.842779GB/s,1172.842779GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1172.842779GB/s,1172.842779GB/s,1172.842779GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",14.645273%,14.645273%,14.645273% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",93.926436%,93.926436%,93.926436% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",90.618386%,90.618386%,90.618386% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",2.072103GB/s,2.072103GB/s,2.072103GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",117.995364GB/s,117.995364GB/s,117.995364GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",439.959211GB/s,439.959211GB/s,439.959211GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",15.935860GB/s,15.935860GB/s,15.935860GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1172.842779GB/s,1172.842779GB/s,1172.842779GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",15.931036GB/s,15.931036GB/s,15.931036GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1172.843086GB/s,1172.843086GB/s,1172.843086GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",36.650391KB/s,36.650391KB/s,36.649414KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",31462400,31462400,31462400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",34467840,34467840,34467840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",34487640,34487640,34487640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007421%,0.007421%,0.007421% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.050562%,0.050562%,0.050562% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",3.893985%,3.893985%,3.893985% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",90.011391%,90.011391%,90.011391% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.013501%,0.013501%,0.013501% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.008171%,0.008171%,0.008171% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000459%,0.000459%,0.000459% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",284426240,284426240,284426240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",796917760,796917760,796917760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",34487640,34487640,34487640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",24913920,24913920,24913920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",24913920,24913920,24913920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2279584,2279584,2279584 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",5.771783%,5.771783%,5.771783% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.242728%,0.242728%,0.242728% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.117969,0.117969,0.117969 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.085983,0.085983,0.085983 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.149586%,2.149586%,2.149586% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",95.215705%,95.215705%,95.215705% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.983491,0.983491,0.983491 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.238719,0.238719,0.238719 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",19777.000000,19777.000000,19777.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.726109%,98.726109%,98.726109% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000727,0.000727,0.000727 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",112797818,112797818,112797818 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772196,167772196,167772196 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",109652186,109652186,109652186 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",122911026,122911026,122911026 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",438.910582GB/s,438.910582GB/s,438.910582GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",438.910582GB/s,438.910582GB/s,438.910582GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",7.512788%,7.512788%,7.512788% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.006013%,51.006013%,51.006013% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",26.981254%,26.981254%,26.981254% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",286.862282GB/s,286.862282GB/s,286.862282GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",321.548879GB/s,321.548879GB/s,321.548879GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",713.283273GB/s,713.283273GB/s,713.283273GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",296.848705GB/s,296.848705GB/s,296.848705GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",438.910582GB/s,438.910582GB/s,438.910582GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",295.091605GB/s,295.091605GB/s,295.091605GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",438.910676GB/s,438.910676GB/s,438.910676GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.715820KB/s,13.715820KB/s,13.714844KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",136320000,136320000,136320000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",66247680,66247680,66247680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",66295865,66295865,66295865 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008794%,0.008794%,0.008794% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.077565%,0.077565%,0.077565% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",11.170857%,11.170857%,11.170857% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",87.913040%,87.913040%,87.913040% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.005547%,0.005547%,0.005547% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003964%,0.003964%,0.003964% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000243%,0.000243%,0.000243% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",714178560,714178560,714178560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1216348160,1216348160,1216348160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",168099840,168099840,168099840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",66295865,66295865,66295865 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",38021120,38021120,38021120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",38021120,38021120,38021120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",113469464,113469464,113469464 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.441771%,0.441771%,0.441771% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.378220%,0.378220%,0.378220% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.097521,0.097521,0.097521 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.072438,0.072438,0.072438 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.810960%,1.810960%,1.810960% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",80.323335%,80.323335%,80.323335% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.741931,0.741931,0.741931 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.252332,0.252332,0.252332 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39103.000000,39103.000000,39103.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.028367%,99.028367%,99.028367% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000130,0.000130,0.000130 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",633421162,633421162,633421162 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",55,55,55 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",624371642,624371642,624371642 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",96431,96431,96431 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.604925%,5.604925%,5.604925% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",1.561274%,1.561274%,1.561274% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",802.713329GB/s,802.713329GB/s,802.713329GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",126.950358MB/s,126.950358MB/s,126.950357MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",862.800613GB/s,862.800613GB/s,862.800613GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",814.416436GB/s,814.416436GB/s,814.416436GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",814.347698GB/s,814.347698GB/s,814.347698GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",74.144531KB/s,74.144531KB/s,74.143555KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.740234KB/s,6.740234KB/s,6.739258KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",200207360,200207360,200207360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",165218246,165218246,165218246 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003481%,0.003481%,0.003481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.375665%,0.375665%,0.375665% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.617431%,99.617431%,99.617431% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001689%,0.001689%,0.001689% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000468%,0.000468%,0.000468% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000794%,0.000794%,0.000794% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000121%,0.000121%,0.000121% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3901521920,3901521920,3901521920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",165218246,165218246,165218246 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",633474628,633474628,633474628 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000010%,0.000010%,0.000010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000340%,0.000340%,0.000340% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.075128,0.075128,0.075128 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.075189,0.075189,0.075189 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.879729%,1.879729%,1.879729% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",94.636696%,94.636696%,94.636696% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999447,0.999447,0.999447 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.075406,0.075406,0.075406 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",10818.000000,10818.000000,10818.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.817365%,98.817365%,98.817365% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000900,0.000900,0.000900 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",70820102,70820102,70820102 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544367,335544367,335544367 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",66862230,66862230,66862230 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",88589196,88589196,88589196 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",821.228629GB/s,821.228629GB/s,821.228629GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",821.228629GB/s,821.228629GB/s,821.228629GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",821.228629GB/s,821.228629GB/s,821.228629GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.423936%,5.423936%,5.423936% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",5.299778%,5.299778%,5.299778% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",73.851328%,73.851328%,73.851328% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",163.642101GB/s,163.642101GB/s,163.642101GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",216.817808GB/s,216.817808GB/s,216.817808GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",231.020676GB/s,231.020676GB/s,231.020676GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",173.899945GB/s,173.899945GB/s,173.899945GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",821.228629GB/s,821.228629GB/s,821.228629GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",173.328803GB/s,173.328803GB/s,173.328803GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",821.228744GB/s,821.228744GB/s,821.228744GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",12.831055KB/s,12.831055KB/s,12.830078KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",47191040,47191040,47191040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",37744640,37744640,37744640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",37778598,37778598,37778598 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005301%,0.005301%,0.005301% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.042419%,0.042419%,0.042419% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",9.933820%,9.933820%,9.933820% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",84.890897%,84.890897%,84.890897% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.004187%,0.004187%,0.004187% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004053%,0.004053%,0.004053% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000562%,0.000562%,0.000562% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",336855040,336855040,336855040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",859832320,859832320,859832320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",37778598,37778598,37778598 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",26880000,26880000,26880000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",26880000,26880000,26880000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",71053464,71053464,71053464 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",4.960198%,4.960198%,4.960198% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.158562%,0.158562%,0.158562% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.064460,0.064460,0.064460 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.045655,0.045655,0.045655 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.141376%,1.141376%,1.141376% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",67.708931%,67.708931%,67.708931% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.732526,0.732526,0.732526 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.120069,0.120069,0.120069 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",32195.000000,32195.000000,32195.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.819887%,98.819887%,98.819887% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000089,0.000089,0.000089 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",10342,10342,10342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",522,522,522 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",400,400,400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.998474%,99.998474%,99.998474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",95.000000%,95.000000%,95.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",2.376434MB/s,2.376434MB/s,2.376433MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",1.821023MB/s,1.821023MB/s,1.821022MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",2983.655193GB/s,2983.655193GB/s,2983.655193GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",46.618189MB/s,46.618189MB/s,46.618188MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",47.082550MB/s,47.082550MB/s,47.082549MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",74.588867KB/s,74.588867KB/s,74.587891KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.308594KB/s,23.308594KB/s,23.307617KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",129827840,129827840,129827840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",129839401,129839401,129839401 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008818%,0.008818%,0.008818% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.984957%,0.984957%,0.984957% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.996994%,98.996994%,98.996994% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.004590%,0.004590%,0.004590% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001566%,0.001566%,0.001566% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002235%,0.002235%,0.002235% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000216%,0.000216%,0.000216% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",2769715200,2769715200,2769715200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",129839401,129839401,129839401 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000590%,0.000590%,0.000590% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.245046,0.245046,0.245046 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.193437,0.193437,0.193437 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",4.835914%,4.835914%,4.835914% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",99.905746%,99.905746%,99.905746% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999458,0.999458,0.999458 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.193813,0.193813,0.193813 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000294,0.000294,0.000294 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",5846,5846,5846 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",1034,1034,1034 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",729,729,729 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.998474%,99.998474%,99.998474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",9.392458MB/s,9.392458MB/s,9.392457MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",6.621955MB/s,6.621955MB/s,6.621954MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5953.219939GB/s,5953.219939GB/s,5953.219939GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",46.508111MB/s,46.508111MB/s,46.508110MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",53.102816MB/s,53.102816MB/s,53.102815MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.825195KB/s,148.825195KB/s,148.824219KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.507812KB/s,46.507812KB/s,46.506836KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",122240000,122240000,122240000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",87255075,87255075,87255075 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018644%,0.018644%,0.018644% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.490626%,1.490626%,1.490626% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",56.106313%,56.106313%,56.106313% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",41.080216%,41.080216%,41.080216% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003350%,0.003350%,0.003350% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.025200%,0.025200%,0.025200% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001037%,0.001037%,0.001037% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",87255075,87255075,87255075 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000070%,0.000070%,0.000070% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.274545%,1.274545%,1.274545% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.259643,0.259643,0.259643 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.259710,0.259710,0.259710 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",6.492757%,6.492757%,6.492757% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.749459%,99.749459%,99.749459% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.989959,0.989959,0.989959 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.021391,1.021391,1.021391 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",16130.000000,16130.000000,16130.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.206835%,99.206835%,99.206835% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001219,0.001219,0.001219 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",270726566,270726566,270726566 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335568544,335568544,335568544 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",267369126,267369126,267369126 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",270490695,270490695,270490695 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",463.888571GB/s,463.888571GB/s,463.888571GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",463.888571GB/s,463.888571GB/s,463.888571GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",463.888571GB/s,463.888571GB/s,463.888571GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.673508%,0.673508%,0.673508% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.320279%,0.320279%,0.320279% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",19.129583%,19.129583%,19.129583% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",369.636660GB/s,369.636660GB/s,369.636660GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",373.952216GB/s,373.952216GB/s,373.952216GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",376.937777GB/s,376.937777GB/s,376.937777GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",371.246623GB/s,371.246623GB/s,371.246623GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",463.888571GB/s,463.888571GB/s,463.888571GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",374.278306GB/s,374.278306GB/s,374.278306GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",463.922061GB/s,463.922061GB/s,463.922061GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",7.248047KB/s,7.248047KB/s,7.247070KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",136320000,136320000,136320000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",64942080,64942080,64942080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",65021231,65021231,65021231 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003734%,0.003734%,0.003734% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.062092%,0.062092%,0.062092% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",17.366320%,17.366320%,17.366320% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",80.860583%,80.860583%,80.860583% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.005381%,0.005381%,0.005381% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004346%,0.004346%,0.004346% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000991%,0.000991%,0.000991% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",693370880,693370880,693370880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1216348160,1216348160,1216348160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",157614080,157614080,157614080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",65021231,65021231,65021231 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",38021120,38021120,38021120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",38021120,38021120,38021120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",268533660,268533660,268533660 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.535278%,1.535278%,1.535278% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.161273%,0.161273%,0.161273% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.042456,0.042456,0.042456 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.034593,0.034593,0.034593 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.864824%,0.864824%,0.864824% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",87.867771%,87.867771%,87.867771% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.583146,0.583146,0.583146 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.095375,0.095375,0.095375 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31039.000000,31039.000000,31039.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.775935%,98.775935%,98.775935% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000081,0.000081,0.000081 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",234401998,234401998,234401998 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",23191,23191,23191 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",160863494,160863494,160863494 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",95679,95679,95679 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",30.139312%,30.139312%,30.139312% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",65.607583%,65.607583%,65.607583% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",768.550976GB/s,768.550976GB/s,768.550976GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",468.092588MB/s,468.092588MB/s,468.092587MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",3206.330763GB/s,3206.330763GB/s,3206.330763GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1119.948194GB/s,1119.948194GB/s,1119.948194GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",1119.892898GB/s,1119.892898GB/s,1119.892898GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",113.457867MB/s,113.457867MB/s,113.457866MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",25.047852KB/s,25.047852KB/s,25.046875KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",158919680,158919680,158919680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",123919967,123919967,123919967 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.012868%,0.012868%,0.012868% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.049107%,1.049107%,1.049107% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.720751%,98.720751%,98.720751% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.209492%,0.209492%,0.209492% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.001685%,0.001685%,0.001685% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002696%,0.002696%,0.002696% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000034%,0.000034%,0.000034% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2580316160,2580316160,2580316160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",123919967,123919967,123919967 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",234413572,234413572,234413572 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000036%,0.000036%,0.000036% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.003331%,0.003331%,0.003331% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.202413,0.202413,0.202413 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.202430,0.202430,0.202430 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",5.060748%,5.060748%,5.060748% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",97.875639%,97.875639%,97.875639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999392,0.999392,0.999392 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.204561,0.204561,0.204561 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",8902.000000,8902.000000,8902.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.562823%,98.562823%,98.562823% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000244,0.000244,0.000244 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",10630,10630,10630 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544336,335544336,335544336 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",2122,2122,2122 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",2897,2897,2897 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",1013.187958GB/s,1013.187958GB/s,1013.187958GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",1013.187958GB/s,1013.187958GB/s,1013.187958GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",1013.187958GB/s,1013.187958GB/s,1013.187958GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.027344%,3.027344%,3.027344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999695%,99.999695%,99.999695% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",6.561233MB/s,6.561233MB/s,6.561232MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",8.957536MB/s,8.957536MB/s,8.957535MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",31.723964GB/s,31.723964GB/s,31.723964GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",31.662124MB/s,31.662124MB/s,31.662123MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1013.187958GB/s,1013.187958GB/s,1013.187958GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",32.868005MB/s,32.868005MB/s,32.868004MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",1013.188007GB/s,1013.188007GB/s,1013.188007GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",15.831055KB/s,15.831055KB/s,15.830078KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",45578240,45578240,45578240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",27941198,27941198,27941198 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.002921%,0.002921%,0.002921% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.019376%,0.019376%,0.019376% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.493042%,0.493042%,0.493042% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.052034%,0.052034%,0.052034% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000903%,0.000903%,0.000903% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001265%,0.001265%,0.001265% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000099%,0.000099%,0.000099% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",180224000,180224000,180224000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",27941198,27941198,27941198 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.914137%,98.914137%,98.914137% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.516223%,0.516223%,0.516223% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.029338,0.029338,0.029338 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.029345,0.029345,0.029345 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.733625%,0.733625%,0.733625% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",96.330384%,96.330384%,96.330384% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.982080,0.982080,0.982080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.350749,0.350749,0.350749 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",13826.000000,13826.000000,13826.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.074660%,99.074660%,99.074660% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000652,0.000652,0.000652 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",31569998,31569998,31569998 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886121,83886121,83886121 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",31459094,31459094,31459094 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",38413052,38413052,38413052 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",809.452069GB/s,809.452069GB/s,809.452069GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",809.452069GB/s,809.452069GB/s,809.452069GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",38.766988%,38.766988%,38.766988% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.483587%,50.483587%,50.483587% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",54.661565%,54.661565%,54.661565% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",303.562030GB/s,303.562030GB/s,303.562030GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",370.663696GB/s,370.663696GB/s,370.663696GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",2023.827792GB/s,2023.827792GB/s,2023.827792GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",305.764665GB/s,305.764665GB/s,305.764665GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",809.452069GB/s,809.452069GB/s,809.452069GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",304.632189GB/s,304.632189GB/s,304.632189GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",809.452465GB/s,809.452465GB/s,809.452465GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",50.589844KB/s,50.589844KB/s,50.588867KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",104862720,104862720,104862720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",70789120,70789120,70789120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",53173678,53173678,53173678 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.010613%,0.010613%,0.010613% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.135530%,0.135530%,0.135530% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",5.056257%,5.056257%,5.056257% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",92.625714%,92.625714%,92.625714% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.005897%,0.005897%,0.005897% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.009883%,0.009883%,0.009883% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001193%,0.001193%,0.001193% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",536084480,536084480,536084480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1090519040,1090519040,1090519040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",63242240,63242240,63242240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",53173678,53173678,53173678 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",34088960,34088960,34088960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",34088960,34088960,34088960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",31687360,31687360,31687360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.165526%,1.165526%,1.165526% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.989387%,0.989387%,0.989387% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.176196,0.176196,0.176196 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.176311,0.176311,0.176311 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.407768%,4.407768%,4.407768% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",96.597710%,96.597710%,96.597710% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.989779,0.989779,0.989779 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.800926,0.800926,0.800926 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31295.000000,31295.000000,31295.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.785948%,98.785948%,98.785948% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000191,0.000191,0.000191 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",29120418,29120418,29120418 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",31,31,31 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",249986,249986,249986 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",107357,107357,107357 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",90.804217%,90.804217%,90.804217% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",97.543305%,97.543305%,97.543305% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",2.211174GB/s,2.211174GB/s,2.211174GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",972.383380MB/s,972.383380MB/s,972.383379MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5936.088193GB/s,5936.088193GB/s,5936.088193GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",272.926570GB/s,272.926570GB/s,272.926570GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",257.575652GB/s,257.575652GB/s,257.575652GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",287.519531KB/s,287.519531KB/s,287.518555KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.374023KB/s,46.374023KB/s,46.373047KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",160230400,160230400,160230400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",125243738,125243738,125243738 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023538%,0.023538%,0.023538% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.938245%,1.938245%,1.938245% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",73.237809%,73.237809%,73.237809% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",24.070984%,24.070984%,24.070984% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003129%,0.003129%,0.003129% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.020857%,0.020857%,0.020857% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000886%,0.000886%,0.000886% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2622259200,2622259200,2622259200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",125243738,125243738,125243738 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",30855928,30855928,30855928 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.704485%,0.704485%,0.704485% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.371750,0.371750,0.371750 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.371821,0.371821,0.371821 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.295532%,9.295532%,9.295532% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.690419%,99.690419%,99.690419% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.998222,0.998222,0.998222 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.818043,0.818043,0.818043 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",15170.000000,15170.000000,15170.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.156641%,99.156641%,99.156641% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000563,0.000563,0.000563 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",41880942,41880942,41880942 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910279,83910279,83910279 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",42307454,42307454,42307454 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",58153359,58153359,58153359 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",578.094563GB/s,578.094563GB/s,578.094563GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",578.094563GB/s,578.094563GB/s,578.094563GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",39.919851%,39.919851%,39.919851% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",49.817397%,49.817397%,49.817397% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",31.483288%,31.483288%,31.483288% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",291.558613GB/s,291.558613GB/s,291.558613GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",400.759467GB/s,400.759467GB/s,400.759467GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1734.424826GB/s,1734.424826GB/s,1734.424826GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",290.205620GB/s,290.205620GB/s,290.205620GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",578.094563GB/s,578.094563GB/s,578.094563GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",288.619338GB/s,288.619338GB/s,288.619338GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",578.261329GB/s,578.261329GB/s,578.261329GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",36.130859KB/s,36.130859KB/s,36.129883KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",125834240,125834240,125834240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",77670400,77670400,77670400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",60059625,60059625,60059625 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008956%,0.008956%,0.008956% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.114549%,0.114549%,0.114549% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.812121%,2.812121%,2.812121% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.068399%,95.068399%,95.068399% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.004551%,0.004551%,0.004551% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.010911%,0.010911%,0.010911% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000621%,0.000621%,0.000621% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",640942080,640942080,640942080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1174405120,1174405120,1174405120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",94699520,94699520,94699520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",60059625,60059625,60059625 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",36710400,36710400,36710400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",36710400,36710400,36710400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",42111124,42111124,42111124 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.124134%,1.124134%,1.124134% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.855758%,0.855758%,0.855758% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.142548,0.142548,0.142548 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.142628,0.142628,0.142628 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",3.565710%,3.565710%,3.565710% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",98.271872%,98.271872%,98.271872% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.991496,0.991496,0.991496 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.685254,0.685254,0.685254 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000296,0.000296,0.000296 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",3286,3286,3286 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",190,190,190 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",65,65,65 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.999237%,99.999237%,99.999237% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",98.750000%,98.750000%,98.750000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",1.725614MB/s,1.725614MB/s,1.725613MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",604.509766KB/s,604.509766KB/s,604.508789KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5952.279163GB/s,5952.279163GB/s,5952.279163GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",23.250381MB/s,23.250381MB/s,23.250380MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",29.844043MB/s,29.844043MB/s,29.844042MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.801758KB/s,148.801758KB/s,148.800781KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.500000KB/s,46.500000KB/s,46.499023KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",87229440,87229440,87229440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",87255301,87255301,87255301 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018622%,0.018622%,0.018622% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.374637%,1.374637%,1.374637% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",54.302100%,54.302100%,54.302100% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",43.130992%,43.130992%,43.130992% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003058%,0.003058%,0.003058% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.023618%,0.023618%,0.023618% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000906%,0.000906%,0.000906% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",87255301,87255301,87255301 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2560,2560,2560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000065%,0.000065%,0.000065% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.146003%,1.146003%,1.146003% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.347148,0.347148,0.347148 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.259694,0.259694,0.259694 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",6.492349%,6.492349%,6.492349% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.877518%,99.877518%,99.877518% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.990256,0.990256,0.990256 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.002286,1.002286,1.002286 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31423.000000,31423.000000,31423.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.790894%,98.790894%,98.790894% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000103,0.000103,0.000103 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",71045906,71045906,71045906 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",23427,23427,23427 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",2035570,2035570,2035570 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",96307,96307,96307 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",78.778342%,78.778342%,78.778342% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",98.900644%,98.900644%,98.900644% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",17.726195GB/s,17.726195GB/s,17.726195GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",858.790644MB/s,858.790644MB/s,858.790643MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5844.167217GB/s,5844.167217GB/s,5844.167217GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",620.095666GB/s,620.095666GB/s,620.095666GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",618.683523GB/s,618.683523GB/s,618.683523GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",208.903697MB/s,208.903697MB/s,208.903696MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",45.655273KB/s,45.655273KB/s,45.654297KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",160885760,160885760,160885760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",125891730,125891730,125891730 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023760%,0.023760%,0.023760% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.939860%,1.939860%,1.939860% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",95.568813%,95.568813%,95.568813% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",2.372091%,2.372091%,2.372091% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003091%,0.003091%,0.003091% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.015195%,0.015195%,0.015195% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000200%,0.000200%,0.000200% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2643230720,2643230720,2643230720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",125891730,125891730,125891730 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",71208068,71208068,71208068 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.076924%,0.076924%,0.076924% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.371755,0.371755,0.371755 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.371793,0.371793,0.371793 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.294830%,9.294830%,9.294830% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",98.732216%,98.732216%,98.732216% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.998845,0.998845,0.998845 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.420184,0.420184,0.420184 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",8838.000000,8838.000000,8838.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.552416%,98.552416%,98.552416% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000284,0.000284,0.000284 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",10890,10890,10890 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772180,167772180,167772180 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",2058,2058,2058 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",9565,9565,9565 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1064.056182GB/s,1064.056182GB/s,1064.056182GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1064.056182GB/s,1064.056182GB/s,1064.056182GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.024384%,3.024384%,3.024384% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.998779%,99.998779%,99.998779% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",13.365646MB/s,13.365646MB/s,13.365645MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",62.119735MB/s,62.119735MB/s,62.119734MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",66.633401GB/s,66.633401GB/s,66.633401GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",66.503510MB/s,66.503510MB/s,66.503510MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1064.056182GB/s,1064.056182GB/s,1064.056182GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",70.724925MB/s,70.724925MB/s,70.724924MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1064.056309GB/s,1064.056309GB/s,1064.056309GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",33.250977KB/s,33.250977KB/s,33.250000KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",27607040,27607040,27607040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",27614893,27614893,27614893 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008461%,0.008461%,0.008461% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.046880%,0.046880%,0.046880% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.597733%,0.597733%,0.597733% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.044421%,0.044421%,0.044421% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.002831%,0.002831%,0.002831% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.007298%,0.007298%,0.007298% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000093%,0.000093%,0.000093% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",169738240,169738240,169738240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",27614893,27614893,27614893 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.985485%,97.985485%,97.985485% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.306797%,1.306797%,1.306797% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.105824,0.105824,0.105824 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.072207,0.072207,0.072207 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.805179%,1.805179%,1.805179% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",81.269214%,81.269214%,81.269214% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.979428,0.979428,0.979428 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.887691,0.887691,0.887691 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",16258.000000,16258.000000,16258.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.213080%,99.213080%,99.213080% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000756,0.000756,0.000756 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",57369470,57369470,57369470 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886120,83886120,83886120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",57342398,57342398,57342398 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",70013432,70013432,70013432 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",456.966713GB/s,456.966713GB/s,456.966713GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",456.966713GB/s,456.966713GB/s,456.966713GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",38.699424%,38.699424%,38.699424% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.251294%,50.251294%,50.251294% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",16.547766%,16.547766%,16.547766% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",312.370862GB/s,312.370862GB/s,312.370862GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",381.395911GB/s,381.395911GB/s,381.395911GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1599.495061GB/s,1599.495061GB/s,1599.495061GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",313.372161GB/s,313.372161GB/s,313.372161GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",456.966713GB/s,456.966713GB/s,456.966713GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",312.518336GB/s,312.518336GB/s,312.518336GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",456.966931GB/s,456.966931GB/s,456.966931GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",28.559570KB/s,28.559570KB/s,28.558594KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",146805760,146805760,146805760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",65597440,65597440,65597440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",65637652,65637652,65637652 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006384%,0.006384%,0.006384% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.101934%,0.101934%,0.101934% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.888007%,2.888007%,2.888007% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.332134%,95.332134%,95.332134% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003936%,0.003936%,0.003936% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.008510%,0.008510%,0.008510% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000850%,0.000850%,0.000850% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",724828160,724828160,724828160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1258291200,1258291200,1258291200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",105185280,105185280,105185280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",65637652,65637652,65637652 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",39331840,39331840,39331840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",39331840,39331840,39331840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",57526208,57526208,57526208 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.880309%,0.880309%,0.880309% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.777938%,0.777938%,0.777938% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.136105,0.136105,0.136105 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.121662,0.121662,0.121662 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",3.041560%,3.041560%,3.041560% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",96.973446%,96.973446%,96.973446% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.991979,0.991979,0.991979 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.617352,0.617352,0.617352 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31551.000000,31551.000000,31551.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.795799%,98.795799%,98.795799% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000130,0.000130,0.000130 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",317790098,317790098,317790098 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",37,37,37 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",314138802,314138802,314138802 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96223,96223,96223 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.301149%,5.301149%,5.301149% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.576790%,50.576790%,50.576790% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",823.072330GB/s,823.072330GB/s,823.072330GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",258.163754MB/s,258.163754MB/s,258.163754MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1758.367141GB/s,1758.367141GB/s,1758.367141GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",832.551333GB/s,832.551333GB/s,832.551333GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",832.639059GB/s,832.639059GB/s,832.639059GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",101.652344KB/s,101.652344KB/s,101.651367KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.736328KB/s,13.736328KB/s,13.735352KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",161541120,161541120,161541120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",126546985,126546985,126546985 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006571%,0.006571%,0.006571% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.580746%,0.580746%,0.580746% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.309533%,99.309533%,99.309533% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.099777%,0.099777%,0.099777% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000916%,0.000916%,0.000916% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001322%,0.001322%,0.001322% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000008%,0.000008%,0.000008% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2664202240,2664202240,2664202240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",126546985,126546985,126546985 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",317756616,317756616,317756616 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000020%,0.000020%,0.000020% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001108%,0.001108%,0.001108% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.112491,0.112491,0.112491 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.112539,0.112539,0.112539 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.813477%,2.813477%,2.813477% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",98.497461%,98.497461%,98.497461% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999249,0.999249,0.999249 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.113247,0.113247,0.113247 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",8902.000000,8902.000000,8902.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.562823%,98.562823%,98.562823% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000331,0.000331,0.000331 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",10494,10494,10494 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544340,335544340,335544340 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",2598,2598,2598 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",12473,12473,12473 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",1341.911548GB/s,1341.911548GB/s,1341.911548GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",1341.911548GB/s,1341.911548GB/s,1341.911548GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",1341.911548GB/s,1341.911548GB/s,1341.911548GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.027344%,3.027344%,3.027344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",80.000000%,80.000000%,80.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999390%,99.999390%,99.999390% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",10.639300MB/s,10.639300MB/s,10.639299MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",51.079292MB/s,51.079292MB/s,51.079291MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",42.016640GB/s,42.016640GB/s,42.016640GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",41.934735MB/s,41.934735MB/s,41.934734MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1341.911548GB/s,1341.911548GB/s,1341.911548GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",42.974914MB/s,42.974914MB/s,42.974913MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",1341.911628GB/s,1341.911628GB/s,1341.911628GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",20.966797KB/s,20.966797KB/s,20.965820KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",45578240,45578240,45578240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",27940637,27940637,27940637 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.004423%,0.004423%,0.004423% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.025438%,0.025438%,0.025438% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.507777%,0.507777%,0.507777% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.043365%,0.043365%,0.043365% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001053%,0.001053%,0.001053% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001582%,0.001582%,0.001582% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000132%,0.000132%,0.000132% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",180224000,180224000,180224000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",27940637,27940637,27940637 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.743864%,98.743864%,98.743864% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.672366%,0.672366%,0.672366% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.038430,0.038430,0.038430 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.038443,0.038443,0.038443 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.961076%,0.961076%,0.961076% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",97.380505%,97.380505%,97.380505% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.984959,0.984959,0.984959 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.457151,0.457151,0.457151 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39871.000000,39871.000000,39871.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.047083%,99.047083%,99.047083% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000104,0.000104,0.000104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",653655246,653655246,653655246 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",52,52,52 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",651416707,651416707,651416707 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",109262,109262,109262 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",2.604110%,2.604110%,2.604110% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.355780%,0.355780%,0.355780% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",822.237409GB/s,822.237409GB/s,822.237409GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",141.223653MB/s,141.223653MB/s,141.223652MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",847.093755GB/s,847.093755GB/s,847.093755GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",825.009322GB/s,825.009322GB/s,825.009322GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",825.062959GB/s,825.062959GB/s,825.062959GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",68.824219KB/s,68.824219KB/s,68.823242KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.617188KB/s,6.617188KB/s,6.616211KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",169128960,169128960,169128960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",169146631,169146631,169146631 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003330%,0.003330%,0.003330% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.368040%,0.368040%,0.368040% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.625136%,99.625136%,99.625136% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001148%,0.001148%,0.001148% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000449%,0.000449%,0.000449% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001449%,0.001449%,0.001449% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000127%,0.000127%,0.000127% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4027351040,4027351040,4027351040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",169146631,169146631,169146631 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",653612752,653612752,653612752 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000010%,0.000010%,0.000010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000312%,0.000312%,0.000312% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.079814,0.079814,0.079814 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.073721,0.073721,0.073721 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.843034%,1.843034%,1.843034% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",95.710306%,95.710306%,95.710306% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999123,0.999123,0.999123 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.073921,0.073921,0.073921 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",10946.000000,10946.000000,10946.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.831194%,98.831194%,98.831194% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000664,0.000664,0.000664 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",23421658,23421658,23421658 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772196,167772196,167772196 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",22698358,22698358,22698358 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",52013674,52013674,52013674 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1111.391676GB/s,1111.391676GB/s,1111.391676GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1111.391676GB/s,1111.391676GB/s,1111.391676GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",13.199955%,13.199955%,13.199955% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.514487%,51.514487%,51.514487% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",69.537528%,69.537528%,69.537528% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",150.363243GB/s,150.363243GB/s,150.363243GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",344.559934GB/s,344.559934GB/s,344.559934GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",694.755465GB/s,694.755465GB/s,694.755465GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",154.761945GB/s,154.761945GB/s,154.761945GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1111.391676GB/s,1111.391676GB/s,1111.391676GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",155.154680GB/s,155.154680GB/s,155.154680GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1111.391914GB/s,1111.391914GB/s,1111.391914GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",34.730469KB/s,34.730469KB/s,34.729492KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",52433920,52433920,52433920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",38400000,38400000,38400000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",38425495,38425495,38425495 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007069%,0.007069%,0.007069% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.066848%,0.066848%,0.066848% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",3.192161%,3.192161%,3.192161% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",90.224009%,90.224009%,90.224009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.007482%,0.007482%,0.007482% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003489%,0.003489%,0.003489% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000187%,0.000187%,0.000187% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",315883520,315883520,315883520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",880803840,880803840,880803840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",21299200,21299200,21299200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",38425495,38425495,38425495 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",27535360,27535360,27535360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",27535360,27535360,27535360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",23362372,23362372,23362372 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",6.123094%,6.123094%,6.123094% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.375660%,0.375660%,0.375660% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.119354,0.119354,0.119354 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.090018,0.090018,0.090018 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.250451%,2.250451%,2.250451% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",94.796667%,94.796667%,94.796667% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.985927,0.985927,0.985927 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.326972,0.326972,0.326972 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",11394.000000,11394.000000,11394.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.877150%,98.877150%,98.877150% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000957,0.000957,0.000957 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",90558762,90558762,90558762 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544371,335544371,335544371 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",88313742,88313742,88313742 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",100009001,100009001,100009001 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",802.141461GB/s,802.141461GB/s,802.141461GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",802.141461GB/s,802.141461GB/s,802.141461GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",802.141461GB/s,802.141461GB/s,802.141461GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.397081%,3.397081%,3.397081% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",2.162155%,2.162155%,2.162155% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",70.450671%,70.450671%,70.450671% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",211.119992GB/s,211.119992GB/s,211.119992GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",239.078302GB/s,239.078302GB/s,239.078302GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",250.718165GB/s,250.718165GB/s,250.718165GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",214.904380GB/s,214.904380GB/s,214.904380GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",802.141461GB/s,802.141461GB/s,802.141461GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",216.486864GB/s,216.486864GB/s,216.486864GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",802.141583GB/s,802.141583GB/s,802.141583GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",12.533203KB/s,12.533203KB/s,12.532227KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",52433920,52433920,52433920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",58337280,58337280,58337280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",40730207,40730207,40730207 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005213%,0.005213%,0.005213% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.051840%,0.051840%,0.051840% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",10.985908%,10.985908%,10.985908% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",86.199176%,86.199176%,86.199176% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.004796%,0.004796%,0.004796% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003094%,0.003094%,0.003094% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001083%,0.001083%,0.001083% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",357826560,357826560,357826560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",880803840,880803840,880803840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",52756480,52756480,52756480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",40730207,40730207,40730207 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",27535360,27535360,27535360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",27535360,27535360,27535360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",89896792,89896792,89896792 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",2.584921%,2.584921%,2.584921% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.163968%,0.163968%,0.163968% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.048299,0.048299,0.048299 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.048345,0.048345,0.048345 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.208629%,1.208629%,1.208629% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",67.423700%,67.423700%,67.423700% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.736778,0.736778,0.736778 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.125861,0.125861,0.125861 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31679.000000,31679.000000,31679.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.800664%,98.800664%,98.800664% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000104,0.000104,0.000104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",334746594,334746594,334746594 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",43,43,43 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",334518638,334518638,334518638 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96263,96263,96263 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.240117%,0.240117%,0.240117% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.022055%,50.022055%,50.022055% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",829.808717GB/s,829.808717GB/s,829.808717GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",244.521482MB/s,244.521482MB/s,244.521482MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1664.756862GB/s,1664.756862GB/s,1664.756862GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",830.354406GB/s,830.354406GB/s,830.354406GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",830.374186GB/s,830.374186GB/s,830.374186GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",111.846680KB/s,111.846680KB/s,111.845703KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.004883KB/s,13.004883KB/s,13.003906KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",127185920,127185920,127185920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",127199162,127199162,127199162 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006193%,0.006193%,0.006193% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.549718%,0.549718%,0.549718% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.353104%,99.353104%,99.353104% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.086594%,0.086594%,0.086594% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000863%,0.000863%,0.000863% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002590%,0.002590%,0.002590% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000008%,0.000008%,0.000008% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2685173760,2685173760,2685173760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",127199162,127199162,127199162 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",334738620,334738620,334738620 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000018%,0.000018%,0.000018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000913%,0.000913%,0.000913% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.113620,0.113620,0.113620 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.106559,0.106559,0.106559 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.663972%,2.663972%,2.663972% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.091734%,99.091734%,99.091734% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999325,0.999325,0.999325 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.107142,0.107142,0.107142 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",12674.000000,12674.000000,12674.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.990552%,98.990552%,98.990552% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000789,0.000789,0.000789 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",58891714,58891714,58891714 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772197,167772197,167772197 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",58716842,58716842,58716842 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",70263300,70263300,70263300 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",927.157156GB/s,927.157156GB/s,927.157156GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",927.157156GB/s,927.157156GB/s,927.157156GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",6.007037%,6.007037%,6.007037% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.078893%,50.078893%,50.078893% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",58.362734%,58.362734%,58.362734% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",324.486138GB/s,324.486138GB/s,324.486138GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",388.295182GB/s,388.295182GB/s,388.295182GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",811.375690GB/s,811.375690GB/s,811.375690GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",325.570163GB/s,325.570163GB/s,325.570163GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",927.157156GB/s,927.157156GB/s,927.157156GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",325.452531GB/s,325.452531GB/s,325.452531GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",927.157361GB/s,927.157361GB/s,927.157361GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",28.973633KB/s,28.973633KB/s,28.972656KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",73405440,73405440,73405440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",47247360,47247360,47247360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",47281272,47281272,47281272 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006038%,0.006038%,0.006038% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.061332%,0.061332%,0.061332% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",8.398498%,8.398498%,8.398498% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",89.591526%,89.591526%,89.591526% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.005526%,0.005526%,0.005526% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.008151%,0.008151%,0.008151% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000202%,0.000202%,0.000202% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",483655680,483655680,483655680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",964689920,964689920,964689920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",52756480,52756480,52756480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",47281272,47281272,47281272 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",30156800,30156800,30156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",30156800,30156800,30156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",58913000,58913000,58913000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.539856%,1.539856%,1.539856% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.388872%,0.388872%,0.388872% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.110666,0.110666,0.110666 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.090766,0.090766,0.090766 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.269140%,2.269140%,2.269140% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",96.767387%,96.767387%,96.767387% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.990533,0.990533,0.990533 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.337275,0.337275,0.337275 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39999.000000,39999.000000,39999.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.050133%,99.050133%,99.050133% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000107,0.000107,0.000107 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671088958,671088958,671088958 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",23699,23699,23699 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",670443710,670443710,670443710 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",96635,96635,96635 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.078215%,0.078215%,0.078215% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",827.777110GB/s,827.777110GB/s,827.777110GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",122.175886MB/s,122.175886MB/s,122.175885MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",828.598672GB/s,828.598672GB/s,828.598672GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",828.573386GB/s,828.573386GB/s,828.573386GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",828.573779GB/s,828.573779GB/s,828.573779GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",29.962708MB/s,29.962708MB/s,29.962708MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.472656KB/s,6.472656KB/s,6.471680KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",169784320,169784320,169784320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",169802561,169802561,169802561 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003448%,0.003448%,0.003448% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.359127%,0.359127%,0.359127% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.634525%,99.634525%,99.634525% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001307%,0.001307%,0.001307% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000443%,0.000443%,0.000443% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000747%,0.000747%,0.000747% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000116%,0.000116%,0.000116% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4048322560,4048322560,4048322560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",169802561,169802561,169802561 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000278%,0.000278%,0.000278% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.079057,0.079057,0.079057 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.071938,0.071938,0.071938 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.798462%,1.798462%,1.798462% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",97.538112%,97.538112%,97.538112% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999086,0.999086,0.999086 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072120,0.072120,0.072120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",12738.000000,12738.000000,12738.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.995623%,98.995623%,98.995623% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001173,0.001173,0.001173 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",125852078,125852078,125852078 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544371,335544371,335544371 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",122184570,122184570,122184570 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",144999801,144999801,144999801 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",821.998724GB/s,821.998724GB/s,821.998724GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",821.998724GB/s,821.998724GB/s,821.998724GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",821.998724GB/s,821.998724GB/s,821.998724GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",4.411976%,4.411976%,4.411976% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.861844%,0.861844%,0.861844% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",57.361560%,57.361560%,57.361560% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",299.321296GB/s,299.321296GB/s,299.321296GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",355.212842GB/s,355.212842GB/s,355.212842GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",359.674613GB/s,359.674613GB/s,359.674613GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",307.491511GB/s,307.491511GB/s,307.491511GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",821.998724GB/s,821.998724GB/s,821.998724GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",308.305763GB/s,308.305763GB/s,308.305763GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",821.998849GB/s,821.998849GB/s,821.998849GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",12.842773KB/s,12.842773KB/s,12.841797KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",73405440,73405440,73405440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",65218560,65218560,65218560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",47632815,47632815,47632815 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005528%,0.005528%,0.005528% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.080900%,0.080900%,0.080900% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",9.727329%,9.727329%,9.727329% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",87.068165%,87.068165%,87.068165% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.008423%,0.008423%,0.008423% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006584%,0.006584%,0.006584% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.002406%,0.002406%,0.002406% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",441712640,441712640,441712640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",964689920,964689920,964689920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",105185280,105185280,105185280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",47632815,47632815,47632815 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",30156800,30156800,30156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",30156800,30156800,30156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",125519696,125519696,125519696 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",2.914846%,2.914846%,2.914846% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.185819%,0.185819%,0.185819% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.047349,0.047349,0.047349 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.047404,0.047404,0.047404 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.185109%,1.185109%,1.185109% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",83.401289%,83.401289%,83.401289% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.602609,0.602609,0.602609 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.116895,0.116895,0.116895 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31679.000000,31679.000000,31679.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.800664%,98.800664%,98.800664% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000100,0.000100,0.000100 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",321421062,321421062,321421062 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",320572353,320572353,320572353 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96366,96366,96366 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",4.221183%,4.221183%,4.221183% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.137781%,50.137781%,50.137781% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",823.100558GB/s,823.100558GB/s,823.100558GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",253.367332MB/s,253.367332MB/s,253.367332MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1723.137651GB/s,1723.137651GB/s,1723.137651GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",825.175246GB/s,825.175246GB/s,825.175246GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",825.279701GB/s,825.279701GB/s,825.279701GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",96.922852KB/s,96.922852KB/s,96.921875KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.460938KB/s,13.460938KB/s,13.459961KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",127185920,127185920,127185920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",127201498,127201498,127201498 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006797%,0.006797%,0.006797% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.570677%,0.570677%,0.570677% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.319284%,99.319284%,99.319284% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.099908%,0.099908%,0.099908% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000910%,0.000910%,0.000910% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001271%,0.001271%,0.001271% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2685173760,2685173760,2685173760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",127201498,127201498,127201498 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",321380380,321380380,321380380 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000019%,0.000019%,0.000019% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001125%,0.001125%,0.001125% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.115161,0.115161,0.115161 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.110707,0.110707,0.110707 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.767681%,2.767681%,2.767681% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",98.873958%,98.873958%,98.873958% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999297,0.999297,0.999297 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.111414,0.111414,0.111414 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",8838.000000,8838.000000,8838.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.552416%,98.552416%,98.552416% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000344,0.000344,0.000344 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",5614,5614,5614 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772176,167772176,167772176 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",330,330,330 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",312,312,312 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",268.558233GB/s,268.558233GB/s,268.558233GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",268.558233GB/s,268.558233GB/s,268.558233GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.027344%,3.027344%,3.027344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",98.750000%,98.750000%,98.750000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999924%,99.999924%,99.999924% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",553.900391KB/s,553.900391KB/s,553.899414KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",523.688477KB/s,523.688477KB/s,523.687500KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",16.817673GB/s,16.817673GB/s,16.817673GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",8.392445MB/s,8.392445MB/s,8.392444MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",268.558233GB/s,268.558233GB/s,268.558233GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",9.202184MB/s,9.202184MB/s,9.202183MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",268.558259GB/s,268.558259GB/s,268.558259GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",8.391602KB/s,8.391602KB/s,8.390625KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",27607040,27607040,27607040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",27616537,27616537,27616537 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.001911%,0.001911%,0.001911% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.010632%,0.010632%,0.010632% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.498310%,0.498310%,0.498310% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.074677%,0.074677%,0.074677% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.001700%,0.001700%,0.001700% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001150%,0.001150%,0.001150% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000018%,0.000018%,0.000018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",169738240,169738240,169738240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",27616537,27616537,27616537 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",99.114475%,99.114475%,99.114475% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.297125%,0.297125%,0.297125% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.026431,0.026431,0.026431 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.016502,0.016502,0.016502 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",0.412550%,0.412550%,0.412550% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",89.724708%,89.724708%,89.724708% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.987078,0.987078,0.987078 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.203300,0.203300,0.203300 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",8902.000000,8902.000000,8902.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.562823%,98.562823%,98.562823% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000235,0.000235,0.000235 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",11126,11126,11126 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544336,335544336,335544336 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",522,522,522 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",2846,2846,2846 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",1013.376879GB/s,1013.376879GB/s,1013.376879GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",1013.376879GB/s,1013.376879GB/s,1013.376879GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",1013.376879GB/s,1013.376879GB/s,1013.376879GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.027344%,3.027344%,3.027344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",95.000000%,95.000000%,95.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999847%,99.999847%,99.999847% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",1.614326MB/s,1.614326MB/s,1.614326MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",8.801484MB/s,8.801484MB/s,8.801483MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",31.729879GB/s,31.729879GB/s,31.729879GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",31.668027MB/s,31.668027MB/s,31.668026MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1013.376879GB/s,1013.376879GB/s,1013.376879GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",34.408053MB/s,34.408053MB/s,34.408052MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",1013.376927GB/s,1013.376927GB/s,1013.376927GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",15.833984KB/s,15.833984KB/s,15.833008KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",45578240,45578240,45578240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",27941272,27941272,27941272 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.002924%,0.002924%,0.002924% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.019450%,0.019450%,0.019450% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.494749%,0.494749%,0.494749% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.052559%,0.052559%,0.052559% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000879%,0.000879%,0.000879% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001268%,0.001268%,0.001268% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000014%,0.000014%,0.000014% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",180224000,180224000,180224000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",27941272,27941272,27941272 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.911362%,98.911362%,98.911362% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.516796%,0.516796%,0.516796% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.029341,0.029341,0.029341 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.029359,0.029359,0.029359 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.733980%,0.733980%,0.733980% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",96.336875%,96.336875%,96.336875% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.982443,0.982443,0.982443 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.350800,0.350800,0.350800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39999.000000,39999.000000,39999.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.050133%,99.050133%,99.050133% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000128,0.000128,0.000128 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",669524830,669524830,669524830 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",42,42,42 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",668275094,668275094,668275094 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",96478,96478,96478 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.233757%,0.233757%,0.233757% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.187902%,0.187902%,0.187902% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",818.090970GB/s,818.090970GB/s,818.090970GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",120.941282MB/s,120.941282MB/s,120.941281MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",821.560337GB/s,821.560337GB/s,821.560337GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",819.614870GB/s,819.614870GB/s,819.614870GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",819.620876GB/s,819.620876GB/s,819.620876GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",53.913086KB/s,53.913086KB/s,53.912109KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.417969KB/s,6.417969KB/s,6.416992KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",204794880,204794880,204794880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",169806008,169806008,169806008 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003251%,0.003251%,0.003251% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.359332%,0.359332%,0.359332% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.634667%,99.634667%,99.634667% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001124%,0.001124%,0.001124% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000437%,0.000437%,0.000437% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000743%,0.000743%,0.000743% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000117%,0.000117%,0.000117% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4048322560,4048322560,4048322560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",169806008,169806008,169806008 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",669519924,669519924,669519924 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000320%,0.000320%,0.000320% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.072052,0.072052,0.072052 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.072062,0.072062,0.072062 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.801542%,1.801542%,1.801542% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",96.727031%,96.727031%,96.727031% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999051,0.999051,0.999051 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072210,0.072210,0.072210 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",13826.000000,13826.000000,13826.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.074660%,99.074660%,99.074660% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000643,0.000643,0.000643 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",48385678,48385678,48385678 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167796320,167796320,167796320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",45735910,45735910,45735910 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",78452443,78452443,78452443 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",670.158460GB/s,670.158460GB/s,670.158460GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",670.158460GB/s,670.158460GB/s,670.158460GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",17.629443%,17.629443%,17.629443% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",52.527163%,52.527163%,52.527163% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",54.402792%,54.402792%,54.402792% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",182.690066GB/s,182.690066GB/s,182.690066GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",313.374808GB/s,313.374808GB/s,313.374808GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",754.010074GB/s,754.010074GB/s,754.010074GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",192.362249GB/s,192.362249GB/s,192.362249GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",670.158460GB/s,670.158460GB/s,670.158460GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",193.274447GB/s,193.274447GB/s,193.274447GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",670.254966GB/s,670.254966GB/s,670.254966GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",20.942383KB/s,20.942383KB/s,20.941406KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",94376960,94376960,94376960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",53145600,53145600,53145600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",53179766,53179766,53179766 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008934%,0.008934%,0.008934% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.101941%,0.101941%,0.101941% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",8.913644%,8.913644%,8.913644% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",89.262393%,89.262393%,89.262393% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.011564%,0.011564%,0.011564% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.005616%,0.005616%,0.005616% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000905%,0.000905%,0.000905% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",557056000,557056000,557056000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1048576000,1048576000,1048576000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",84213760,84213760,84213760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",53179766,53179766,53179766 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",32778240,32778240,32778240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",32778240,32778240,32778240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",48157312,48157312,48157312 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.239261%,1.239261%,1.239261% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.455741%,0.455741%,0.455741% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.111103,0.111103,0.111103 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.097440,0.097440,0.097440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.435993%,2.435993%,2.435993% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",73.306439%,73.306439%,73.306439% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.737819,0.737819,0.737819 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.311823,0.311823,0.311823 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",8710.000000,8710.000000,8710.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.531142%,98.531142%,98.531142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000140,0.000140,0.000140 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",5222,5222,5222 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886096,83886096,83886096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",1218,1218,1218 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",4651,4651,4651 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1013.778877GB/s,1013.778877GB/s,1013.778877GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1013.778877GB/s,1013.778877GB/s,1013.778877GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.876608%,5.876608%,5.876608% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.998779%,99.998779%,99.998779% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",15.073030MB/s,15.073030MB/s,15.073030MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",57.557197MB/s,57.557197MB/s,57.557196MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",126.969864GB/s,126.969864GB/s,126.969864GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",63.361179MB/s,63.361179MB/s,63.361178MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1013.778877GB/s,1013.778877GB/s,1013.778877GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",64.623453MB/s,64.623453MB/s,64.623452MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1013.779070GB/s,1013.779070GB/s,1013.779070GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",63.360352KB/s,63.360352KB/s,63.359375KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",44595200,44595200,44595200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",26955313,26955313,26955313 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.014251%,0.014251%,0.014251% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.068383%,0.068383%,0.068383% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.650780%,0.650780%,0.650780% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.045457%,0.045457%,0.045457% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.004833%,0.004833%,0.004833% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004595%,0.004595%,0.004595% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000286%,0.000286%,0.000286% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",159088640,159088640,159088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",491520,491520,491520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",26955313,26955313,26955313 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.020896%,97.020896%,97.020896% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",2.190518%,2.190518%,2.190518% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.118775,0.118775,0.118775 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.118792,0.118792,0.118792 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",2.969796%,2.969796%,2.969796% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",91.773329%,91.773329%,91.773329% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.972157,0.972157,0.972157 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.471432,1.471432,1.471432 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",13506.000000,13506.000000,13506.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.052736%,99.052736%,99.052736% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000989,0.000989,0.000989 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",174218906,174218906,174218906 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544375,335544375,335544375 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",173088873,173088873,173088873 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",189917403,189917403,189917403 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",662.906440GB/s,662.906440GB/s,662.906440GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",662.906440GB/s,662.906440GB/s,662.906440GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",662.906440GB/s,662.906440GB/s,662.906440GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",2.315872%,2.315872%,2.315872% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.559745%,0.559745%,0.559745% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",43.508689%,43.508689%,43.508689% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",341.956999GB/s,341.956999GB/s,341.956999GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",375.203698GB/s,375.203698GB/s,375.203698GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",372.925333GB/s,372.925333GB/s,372.925333GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",348.897270GB/s,348.897270GB/s,348.897270GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",662.906440GB/s,662.906440GB/s,662.906440GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",344.189509GB/s,344.189509GB/s,344.189509GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",662.906549GB/s,662.906549GB/s,662.906549GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",10.357422KB/s,10.357422KB/s,10.356445KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",94376960,94376960,94376960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",51507200,51507200,51507200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",51560189,51560189,51560189 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.004443%,0.004443%,0.004443% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.053100%,0.053100%,0.053100% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",16.570827%,16.570827%,16.570827% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",79.549602%,79.549602%,79.549602% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.005165%,0.005165%,0.005165% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004267%,0.004267%,0.004267% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001545%,0.001545%,0.001545% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",525598720,525598720,525598720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1048576000,1048576000,1048576000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",63242240,63242240,63242240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",51560189,51560189,51560189 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",32778240,32778240,32778240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",32778240,32778240,32778240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",176601840,176601840,176601840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",3.645709%,3.645709%,3.645709% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.165341%,0.165341%,0.165341% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.051706,0.051706,0.051706 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.040339,0.040339,0.040339 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.008477%,1.008477%,1.008477% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",85.047992%,85.047992%,85.047992% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.596909,0.596909,0.596909 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.103141,0.103141,0.103141 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",8834.000000,8834.000000,8834.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.551760%,98.551760%,98.551760% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000389,0.000389,0.000389 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",41062,41062,41062 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886096,83886096,83886096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",40970,40970,40970 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",1881086,1881086,1881086 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1187.602193GB/s,1187.602193GB/s,1187.602193GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1187.602193GB/s,1187.602193GB/s,1187.602193GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",11.067708%,11.067708%,11.067708% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.870768%,99.870768%,99.870768% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",593.946067MB/s,593.946067MB/s,593.946066MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",26.631139GB/s,26.631139GB/s,26.631139GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",297.190490GB/s,297.190490GB/s,297.190490GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",593.801096MB/s,593.801096MB/s,593.801095MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1187.602193GB/s,1187.602193GB/s,1187.602193GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",595.279800MB/s,595.279800MB/s,595.279799MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1187.602420GB/s,1187.602420GB/s,1187.602420GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",74.224609KB/s,74.224609KB/s,74.223633KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",10490880,10490880,10490880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",45230080,45230080,45230080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",27597288,27597288,27597288 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.017637%,0.017637%,0.017637% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.091470%,0.091470%,0.091470% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.996208%,0.996208%,0.996208% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.318515%,0.318515%,0.318515% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.008316%,0.008316%,0.008316% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006715%,0.006715%,0.006715% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000260%,0.000260%,0.000260% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",158597120,158597120,158597120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",713031680,713031680,713031680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",27597288,27597288,27597288 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",22292480,22292480,22292480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",22292480,22292480,22292480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",40960,40960,40960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",95.812511%,95.812511%,95.812511% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",2.748367%,2.748367%,2.748367% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.147679,0.147679,0.147679 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.147895,0.147895,0.147895 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",3.697380%,3.697380%,3.697380% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",88.374841%,88.374841%,88.374841% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.955943,0.955943,0.955943 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.834141,1.834141,1.834141 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31935.000000,31935.000000,31935.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.810279%,98.810279%,98.810279% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000123,0.000123,0.000123 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",167773094,167773094,167773094 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",167747854,167747854,167747854 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",97127,97127,97127 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.007399%,50.007399%,50.007399% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",749.061084GB/s,749.061084GB/s,749.061084GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",444.119859MB/s,444.119859MB/s,444.119858MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",2996.769933GB/s,2996.769933GB/s,2996.769933GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",749.169620GB/s,749.169620GB/s,749.169620GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",749.173791GB/s,749.173791GB/s,749.173791GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",168.562500KB/s,168.562500KB/s,168.561523KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.411133KB/s,23.411133KB/s,23.410156KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",128496640,128496640,128496640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",128511183,128511183,128511183 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.011284%,0.011284%,0.011284% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.010989%,1.010989%,1.010989% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.969214%,98.969214%,98.969214% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.003437%,0.003437%,0.003437% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.001583%,0.001583%,0.001583% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002401%,0.002401%,0.002401% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000407%,0.000407%,0.000407% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2727116800,2727116800,2727116800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",128511183,128511183,128511183 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000651%,0.000651%,0.000651% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.184376,0.184376,0.184376 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.195186,0.195186,0.195186 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.879660%,4.879660%,4.879660% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.108985%,99.108985%,99.108985% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999123,0.999123,0.999123 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.195411,0.195411,0.195411 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",30015.000000,30015.000000,30015.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.734175%,98.734175%,98.734175% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000245,0.000245,0.000245 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",82022,82022,82022 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",81930,81930,81930 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",31378,31378,31378 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.975586%,99.975586%,99.975586% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",743.471091MB/s,743.471091MB/s,743.471090MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",284.738629MB/s,284.738629MB/s,284.738628MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5947.224263GB/s,5947.224263GB/s,5947.224263GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",743.380346MB/s,743.380346MB/s,743.380345MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",744.305942MB/s,744.305942MB/s,744.305941MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.675781KB/s,148.675781KB/s,148.674805KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.460938KB/s,46.460938KB/s,46.459961KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",153676800,153676800,153676800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",118695351,118695351,118695351 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.025150%,0.025150%,0.025150% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.961609%,1.961609%,1.961609% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",55.237961%,55.237961%,55.237961% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",41.486628%,41.486628%,41.486628% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003282%,0.003282%,0.003282% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.025574%,0.025574%,0.025574% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001175%,0.001175%,0.001175% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2412544000,2412544000,2412544000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",118695351,118695351,118695351 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",81920,81920,81920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000070%,0.000070%,0.000070% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.258550%,1.258550%,1.258550% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.352860,0.352860,0.352860 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.352976,0.352976,0.352976 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",8.824405%,8.824405%,8.824405% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.766748%,99.766748%,99.766748% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.980619,0.980619,0.980619 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.104482,1.104482,1.104482 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31679.000000,31679.000000,31679.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.800664%,98.800664%,98.800664% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000083,0.000083,0.000083 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",141265902,141265902,141265902 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",23404,23404,23404 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",139806766,139806766,139806766 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",97199,97199,97199 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",57.904335%,57.904335%,57.904335% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.392736%,50.392736%,50.392736% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",685.770890GB/s,685.770890GB/s,685.770890GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",488.216676MB/s,488.216676MB/s,488.216675MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",3291.880011GB/s,3291.880011GB/s,3291.880011GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",692.848243GB/s,692.848243GB/s,692.848243GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",692.928147GB/s,692.928147GB/s,692.928147GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",117.554944MB/s,117.554944MB/s,117.554943MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",25.716797KB/s,25.716797KB/s,25.715820KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",127185920,127185920,127185920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",127196513,127196513,127196513 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.013478%,0.013478%,0.013478% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.112679%,1.112679%,1.112679% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.855252%,98.855252%,98.855252% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.010131%,0.010131%,0.010131% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.001781%,0.001781%,0.001781% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.005589%,0.005589%,0.005589% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000395%,0.000395%,0.000395% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2685173760,2685173760,2685173760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",127196513,127196513,127196513 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",141249612,141249612,141249612 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000037%,0.000037%,0.000037% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000660%,0.000660%,0.000660% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.198713,0.198713,0.198713 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.215144,0.215144,0.215144 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.378597%,5.378597%,5.378597% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",96.777070%,96.777070%,96.777070% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999184,0.999184,0.999184 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.215566,0.215566,0.215566 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",10114.000000,10114.000000,10114.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.735045%,98.735045%,98.735045% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000565,0.000565,0.000565 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",123358,123358,123358 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886125,83886125,83886125 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",122974,122974,122974 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",3732296,3732296,3732296 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1279.884667GB/s,1279.884667GB/s,1279.884667GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1279.884667GB/s,1279.884667GB/s,1279.884667GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",27.166193%,27.166193%,27.166193% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",96.908164%,96.908164%,96.908164% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",1.876265GB/s,1.876265GB/s,1.876265GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",56.945186GB/s,56.945186GB/s,56.945186GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",960.225972GB/s,960.225972GB/s,960.225972GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1.874831GB/s,1.874831GB/s,1.874831GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1279.884667GB/s,1279.884667GB/s,1279.884667GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",1.882124GB/s,1.882124GB/s,1.882124GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1279.885354GB/s,1279.885354GB/s,1279.885354GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",79.992188KB/s,79.992188KB/s,79.991211KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",31462400,31462400,31462400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",51783680,51783680,51783680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",34155756,34155756,34155756 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.015597%,0.015597%,0.015597% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.168155%,0.168155%,0.168155% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.495684%,0.495684%,0.495684% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",93.574718%,93.574718%,93.574718% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.010428%,0.010428%,0.010428% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.014960%,0.014960%,0.014960% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000338%,0.000338%,0.000338% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",252968960,252968960,252968960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",796917760,796917760,796917760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",31784960,31784960,31784960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",34155756,34155756,34155756 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",24913920,24913920,24913920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",24913920,24913920,24913920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",122880,122880,122880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",5.181950%,5.181950%,5.181950% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.538170%,0.538170%,0.538170% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.180122,0.180122,0.180122 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.180224,0.180224,0.180224 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.505603%,4.505603%,4.505603% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",96.825371%,96.825371%,96.825371% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.982980,0.982980,0.982980 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.512082,0.512082,0.512082 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",11778.000000,11778.000000,11778.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.913759%,98.913759%,98.913759% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000487,0.000487,0.000487 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",2759834,2759834,2759834 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886117,83886117,83886117 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",305917,305917,305917 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",11393075,11393075,11393075 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1239.882558GB/s,1239.882558GB/s,1239.882558GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1239.882558GB/s,1239.882558GB/s,1239.882558GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",40.976445%,40.976445%,40.976445% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",93.292926%,93.292926%,93.292926% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",88.323140%,88.323140%,88.323140% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",4.521622GB/s,4.521622GB/s,4.521622GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",168.395936GB/s,168.395936GB/s,168.395936GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1860.126543GB/s,1860.126543GB/s,1860.126543GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",40.807271GB/s,40.807271GB/s,40.807271GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1239.882558GB/s,1239.882558GB/s,1239.882558GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",40.791870GB/s,40.791870GB/s,40.791870GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1239.883105GB/s,1239.883105GB/s,1239.883105GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",77.492188KB/s,77.492188KB/s,77.491211KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",62919680,62919680,62919680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",60303360,60303360,60303360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",42676102,42676102,42676102 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.016468%,0.016468%,0.016468% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.195266%,0.195266%,0.195266% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",1.030966%,1.030966%,1.030966% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.424132%,95.424132%,95.424132% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.009023%,0.009023%,0.009023% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.026692%,0.026692%,0.026692% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001141%,0.001141%,0.001141% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",368312320,368312320,368312320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",922746880,922746880,922746880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",63242240,63242240,63242240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",42676102,42676102,42676102 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",28846080,28846080,28846080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",28846080,28846080,28846080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2760876,2760876,2760876 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",2.278625%,2.278625%,2.278625% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.017687%,1.017687%,1.017687% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.230037,0.230037,0.230037 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.230149,0.230149,0.230149 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.753722%,5.753722%,5.753722% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",91.645899%,91.645899%,91.645899% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.982270,0.982270,0.982270 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.865385,0.865385,0.865385 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31039.000000,31039.000000,31039.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.775935%,98.775935%,98.775935% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000207,0.000207,0.000207 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",163942,163942,163942 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",1756,1756,1756 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",163850,163850,163850 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",69294,69294,69294 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.951172%,99.951172%,99.951172% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",1.450728GB/s,1.450728GB/s,1.450728GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",628.253719MB/s,628.253719MB/s,628.253718MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5941.999639GB/s,5941.999639GB/s,5941.999639GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1.450639GB/s,1.450639GB/s,1.450639GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",1.451542GB/s,1.451542GB/s,1.451542GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",15.920765MB/s,15.920765MB/s,15.920764MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.419922KB/s,46.419922KB/s,46.418945KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",123909120,123909120,123909120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",123934737,123934737,123934737 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.025544%,0.025544%,0.025544% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.952311%,1.952311%,1.952311% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",54.561929%,54.561929%,54.561929% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",42.163739%,42.163739%,42.163739% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003182%,0.003182%,0.003182% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.024144%,0.024144%,0.024144% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001139%,0.001139%,0.001139% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2580316160,2580316160,2580316160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",123934737,123934737,123934737 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",163840,163840,163840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000067%,0.000067%,0.000067% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.267946%,1.267946%,1.267946% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.405141,0.405141,0.405141 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.368329,0.368329,0.368329 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.208223%,9.208223%,9.208223% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.674227%,99.674227%,99.674227% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.997278,0.997278,0.997278 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.158333,1.158333,1.158333 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",12994.000000,12994.000000,12994.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.015411%,99.015411%,99.015411% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000519,0.000519,0.000519 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",15087730,15087730,15087730 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886115,83886115,83886115 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",14625654,14625654,14625654 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",21588020,21588020,21588020 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1116.473160GB/s,1116.473160GB/s,1116.473160GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1116.473160GB/s,1116.473160GB/s,1116.473160GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",41.104379%,41.104379%,41.104379% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.255905%,51.255905%,51.255905% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",75.486951%,75.486951%,75.486951% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",194.658639GB/s,194.658639GB/s,194.658639GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",287.323533GB/s,287.323533GB/s,287.323533GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",2233.218896GB/s,2233.218896GB/s,2233.218896GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",198.634448GB/s,198.634448GB/s,198.634448GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1116.473160GB/s,1116.473160GB/s,1116.473160GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",200.808592GB/s,200.808592GB/s,200.808592GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1116.473625GB/s,1116.473625GB/s,1116.473625GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",69.779297KB/s,69.779297KB/s,69.778320KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",83891200,83891200,83891200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",66529280,66529280,66529280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",48911120,48911120,48911120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.017261%,0.017261%,0.017261% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.225538%,0.225538%,0.225538% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.996851%,2.996851%,2.996851% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",94.139301%,94.139301%,94.139301% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.007151%,0.007151%,0.007151% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.013881%,0.013881%,0.013881% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001129%,0.001129%,0.001129% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",462684160,462684160,462684160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1006632960,1006632960,1006632960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",84213760,84213760,84213760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",48911120,48911120,48911120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",31467520,31467520,31467520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",31467520,31467520,31467520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",14924376,14924376,14924376 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.451099%,1.451099%,1.451099% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.147788%,1.147788%,1.147788% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.230186,0.230186,0.230186 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.230773,0.230773,0.230773 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.769313%,5.769313%,5.769313% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",94.485985%,94.485985%,94.485985% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.984475,0.984475,0.984475 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.946529,0.946529,0.946529 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",36287.000000,36287.000000,36287.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.952965%,98.952965%,98.952965% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000100,0.000100,0.000100 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",163942,163942,163942 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",23,23,23 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",163996,163996,163996 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",44061,44061,44061 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.975586%,99.975586%,99.975586% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",745.792848MB/s,745.792848MB/s,745.792847MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",200.373050MB/s,200.373050MB/s,200.373049MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",2980.424625GB/s,2980.424625GB/s,2980.424625GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",745.083418MB/s,745.083418MB/s,745.083417MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",745.547276MB/s,745.547276MB/s,745.547275MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",107.105469KB/s,107.105469KB/s,107.104492KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.283203KB/s,23.283203KB/s,23.282227KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",185789440,185789440,185789440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",150790788,150790788,150790788 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.010973%,0.010973%,0.010973% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.131116%,1.131116%,1.131116% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.841205%,98.841205%,98.841205% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.012072%,0.012072%,0.012072% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001539%,0.001539%,0.001539% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002332%,0.002332%,0.002332% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000261%,0.000261%,0.000261% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3440148480,3440148480,3440148480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",150790788,150790788,150790788 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",163840,163840,163840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000469%,0.000469%,0.000469% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.224406,0.224406,0.224406 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.224428,0.224428,0.224428 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",5.610709%,5.610709%,5.610709% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",99.902275%,99.902275%,99.902275% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999353,0.999353,0.999353 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.224733,0.224733,0.224733 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",8834.000000,8834.000000,8834.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.551760%,98.551760%,98.551760% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000288,0.000288,0.000288 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",41062,41062,41062 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772208,167772208,167772208 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",41674,41674,41674 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",2470929,2470929,2470929 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1166.336360GB/s,1166.336360GB/s,1166.336360GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1166.336360GB/s,1166.336360GB/s,1166.336360GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.006629%,3.006629%,3.006629% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.669268%,99.669268%,99.669268% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",296.666879MB/s,296.666879MB/s,296.666878MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",17.177667GB/s,17.177667GB/s,17.177667GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",73.038398GB/s,73.038398GB/s,73.038398GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",291.584089MB/s,291.584089MB/s,291.584088MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1166.336360GB/s,1166.336360GB/s,1166.336360GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",292.310203MB/s,292.310203MB/s,292.310202MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1166.336694GB/s,1166.336694GB/s,1166.336694GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",36.447266KB/s,36.447266KB/s,36.446289KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",27586560,27586560,27586560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",27594496,27594496,27594496 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008700%,0.008700%,0.008700% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.041719%,0.041719%,0.041719% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.647920%,0.647920%,0.647920% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.085984%,0.085984%,0.085984% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003032%,0.003032%,0.003032% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003403%,0.003403%,0.003403% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000199%,0.000199%,0.000199% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",168919040,168919040,168919040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",10977280,10977280,10977280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",27594496,27594496,27594496 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",40960,40960,40960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.896878%,97.896878%,97.896878% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.312166%,1.312166%,1.312166% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.106380,0.106380,0.106380 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.072526,0.072526,0.072526 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.813148%,1.813148%,1.813148% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",88.977575%,88.977575%,88.977575% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.969011,0.969011,0.969011 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.880707,0.880707,0.880707 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",14914.000000,14914.000000,14914.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.142165%,99.142165%,99.142165% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000586,0.000586,0.000586 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",79281906,79281906,79281906 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772204,167772204,167772204 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",76836254,76836254,76836254 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",97553670,97553670,97553670 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",543.174437GB/s,543.174437GB/s,543.174437GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",543.174437GB/s,543.174437GB/s,543.174437GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",11.310002%,11.310002%,11.310002% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.245892%,51.245892%,51.245892% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",42.379341%,42.379341%,42.379341% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",248.762900GB/s,248.762900GB/s,248.762900GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",315.837025GB/s,315.837025GB/s,315.837025GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",712.982755GB/s,712.982755GB/s,712.982755GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",254.709756GB/s,254.709756GB/s,254.709756GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",543.174437GB/s,543.174437GB/s,543.174437GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",256.680874GB/s,256.680874GB/s,256.680874GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",543.174580GB/s,543.174580GB/s,543.174580GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",16.973633KB/s,16.973633KB/s,16.972656KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",110105600,110105600,110105600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",58716160,58716160,58716160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",58753721,58753721,58753721 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (7)","High (7)","High (7)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006213%,0.006213%,0.006213% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.074012%,0.074012%,0.074012% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",11.614491%,11.614491%,11.614491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",87.117418%,87.117418%,87.117418% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.006179%,0.006179%,0.006179% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004797%,0.004797%,0.004797% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000353%,0.000353%,0.000353% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",651427840,651427840,651427840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1111490560,1111490560,1111490560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",105185280,105185280,105185280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",58753721,58753721,58753721 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",34744320,34744320,34744320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",34744320,34744320,34744320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",78673080,78673080,78673080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.781050%,0.781050%,0.781050% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.395486%,0.395486%,0.395486% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.097229,0.097229,0.097229 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.082860,0.082860,0.082860 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.071503%,2.071503%,2.071503% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",78.057656%,78.057656%,78.057656% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.740706,0.740706,0.740706 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.269792,0.269792,0.269792 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",9410.000000,9410.000000,9410.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.640409%,98.640409%,98.640409% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000820,0.000820,0.000820 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",3766154,3766154,3766154 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544379,335544379,335544379 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",1976618,1976618,1976618 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",23961859,23961859,23961859 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",961.615206GB/s,961.615206GB/s,961.615206GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",961.615206GB/s,961.615206GB/s,961.615206GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",961.615206GB/s,961.615206GB/s,961.615206GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",10.291092%,10.291092%,10.291092% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",60.147028%,60.147028%,60.147028% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",93.308101%,93.308101%,93.308101% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",5.664664GB/s,5.664664GB/s,5.664664GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",68.670773GB/s,68.670773GB/s,68.670773GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",120.260593GB/s,120.260593GB/s,120.260593GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",8.871106GB/s,8.871106GB/s,8.871106GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",961.615206GB/s,961.615206GB/s,961.615206GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",10.793182GB/s,10.793182GB/s,10.793182GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",961.615375GB/s,961.615375GB/s,961.615375GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",15.024414KB/s,15.024414KB/s,15.023438KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",20976640,20976640,20976640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",48179200,48179200,48179200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",30559095,30559095,30559095 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005903%,0.005903%,0.005903% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.042487%,0.042487%,0.042487% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",3.536245%,3.536245%,3.536245% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",52.176760%,52.176760%,52.176760% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.002584%,0.002584%,0.002584% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006090%,0.006090%,0.006090% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000452%,0.000452%,0.000452% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",211025920,211025920,211025920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",754974720,754974720,754974720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",30559095,30559095,30559095 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",23603200,23603200,23603200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",23603200,23603200,23603200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",3095468,3095468,3095468 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",43.976013%,43.976013%,43.976013% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.253465%,0.253465%,0.253465% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.042069,0.042069,0.042069 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.042104,0.042104,0.042104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.052596%,1.052596%,1.052596% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",69.584698%,69.584698%,69.584698% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.704211,0.704211,0.704211 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.155726,0.155726,0.155726 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31295.000000,31295.000000,31295.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.785948%,98.785948%,98.785948% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000127,0.000127,0.000127 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",304586530,304586530,304586530 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",35,35,35 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",302275445,302275445,302275445 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96298,96298,96298 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",9.238684%,9.238684%,9.238684% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.368774%,50.368774%,50.368774% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",815.535789GB/s,815.535789GB/s,815.535789GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",266.046394MB/s,266.046394MB/s,266.046393MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1810.644941GB/s,1810.644941GB/s,1810.644941GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",821.657512GB/s,821.657512GB/s,821.657512GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",821.771070GB/s,821.771070GB/s,821.771070GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",99.016602KB/s,99.016602KB/s,99.015625KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",14.144531KB/s,14.144531KB/s,14.143555KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",160230400,160230400,160230400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",125235718,125235718,125235718 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006807%,0.006807%,0.006807% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.595979%,0.595979%,0.595979% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.272721%,99.272721%,99.272721% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.120771%,0.120771%,0.120771% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000949%,0.000949%,0.000949% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001375%,0.001375%,0.001375% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2622259200,2622259200,2622259200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",125235718,125235718,125235718 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",304544440,304544440,304544440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000020%,0.000020%,0.000020% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001369%,0.001369%,0.001369% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.115300,0.115300,0.115300 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.115186,0.115186,0.115186 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.879652%,2.879652%,2.879652% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",98.270763%,98.270763%,98.270763% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999301,0.999301,0.999301 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.116059,0.116059,0.116059 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",14466.000000,14466.000000,14466.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.115599%,99.115599%,99.115599% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001215,0.001215,0.001215 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",212680394,212680394,212680394 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544358,335544358,335544358 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",213231082,213231082,213231082 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",219210375,219210375,219210375 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",576.165641GB/s,576.165641GB/s,576.165641GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",576.165641GB/s,576.165641GB/s,576.165641GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",576.165641GB/s,576.165641GB/s,576.165641GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",1.350396%,1.350396%,1.350396% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.235100%,0.235100%,0.235100% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",35.013282%,35.013282%,35.013282% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",366.140673GB/s,366.140673GB/s,366.140673GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",376.407761GB/s,376.407761GB/s,376.407761GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",378.143868GB/s,378.143868GB/s,378.143868GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",365.222223GB/s,365.222223GB/s,365.222223GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",576.165641GB/s,576.165641GB/s,576.165641GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",365.195082GB/s,365.195082GB/s,365.195082GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",576.165706GB/s,576.165706GB/s,576.165706GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",9.001953KB/s,9.001953KB/s,9.000977KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",110105600,110105600,110105600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",56422400,56422400,56422400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",56490974,56490974,56490974 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.004063%,0.004063%,0.004063% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.061288%,0.061288%,0.061288% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",9.671759%,9.671759%,9.671759% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",89.303219%,89.303219%,89.303219% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.006854%,0.006854%,0.006854% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003068%,0.003068%,0.003068% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000839%,0.000839%,0.000839% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",588513280,588513280,588513280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1111490560,1111490560,1111490560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",94699520,94699520,94699520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",56490974,56490974,56490974 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",34744320,34744320,34744320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",34744320,34744320,34744320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",212696200,212696200,212696200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.765619%,0.765619%,0.765619% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.183291%,0.183291%,0.183291% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.048033,0.048033,0.048033 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.037660,0.037660,0.037660 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.941497%,0.941497%,0.941497% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",82.938247%,82.938247%,82.938247% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.605546,0.605546,0.605546 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.107386,0.107386,0.107386 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39615.000000,39615.000000,39615.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.040925%,99.040925%,99.040925% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000105,0.000105,0.000105 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",655996150,655996150,655996150 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",23311,23311,23311 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",651342318,651342318,651342318 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",109375,109375,109375 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",2.248783%,2.248783%,2.248783% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.693553%,0.693553%,0.693553% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",818.351229GB/s,818.351229GB/s,818.351229GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",140.717615MB/s,140.717615MB/s,140.717614MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",843.186383GB/s,843.186383GB/s,843.186383GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",824.199802GB/s,824.199802GB/s,824.199802GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",824.198337GB/s,824.198337GB/s,824.198337GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",29.991024MB/s,29.991024MB/s,29.991023MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.586914KB/s,6.586914KB/s,6.585938KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",167818240,167818240,167818240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",167835918,167835918,167835918 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003578%,0.003578%,0.003578% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.368061%,0.368061%,0.368061% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.625530%,99.625530%,99.625530% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001182%,0.001182%,0.001182% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000452%,0.000452%,0.000452% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000768%,0.000768%,0.000768% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000125%,0.000125%,0.000125% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3985408000,3985408000,3985408000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",167835918,167835918,167835918 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",655997316,655997316,655997316 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000010%,0.000010%,0.000010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000293%,0.000293%,0.000293% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.080943,0.080943,0.080943 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.073637,0.073637,0.073637 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.840926%,1.840926%,1.840926% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",96.078808%,96.078808%,96.078808% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999261,0.999261,0.999261 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.073824,0.073824,0.073824 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",29247.000000,29247.000000,29247.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.700935%,98.700935%,98.700935% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000220,0.000220,0.000220 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",123326,123326,123326 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",122978,122978,122978 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",37347,37347,37347 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.963379%,99.963379%,99.963379% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",1.088889GB/s,1.088889GB/s,1.088889GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",338.619576MB/s,338.619576MB/s,338.619575MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5942.230904GB/s,5942.230904GB/s,5942.230904GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1.088022GB/s,1.088022GB/s,1.088022GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",1.091971GB/s,1.091971GB/s,1.091971GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.550781KB/s,148.550781KB/s,148.549805KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.421875KB/s,46.421875KB/s,46.420898KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",149744640,149744640,149744640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",114759872,114759872,114759872 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.024802%,0.024802%,0.024802% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.788243%,1.788243%,1.788243% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",54.283385%,54.283385%,54.283385% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",42.540995%,42.540995%,42.540995% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003085%,0.003085%,0.003085% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.024935%,0.024935%,0.024935% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001034%,0.001034%,0.001034% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2286714880,2286714880,2286714880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",114759872,114759872,114759872 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",122880,122880,122880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.333456%,1.333456%,1.333456% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.341001,0.341001,0.341001 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.341076,0.341076,0.341076 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",8.526905%,8.526905%,8.526905% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.705991%,99.705991%,99.705991% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.978246,0.978246,0.978246 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.186584,1.186584,1.186584 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",9666.000000,9666.000000,9666.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.676417%,98.676417%,98.676417% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000664,0.000664,0.000664 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",1703206,1703206,1703206 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772204,167772204,167772204 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",216378,216378,216378 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",12586426,12586426,12586426 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1184.455490GB/s,1184.455490GB/s,1184.455490GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1184.455490GB/s,1184.455490GB/s,1184.455490GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",12.634422%,12.634422%,12.634422% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",93.145333%,93.145333%,93.145333% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",92.773976%,92.773976%,92.773976% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",1.527608GB/s,1.527608GB/s,1.527608GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",88.858970GB/s,88.858970GB/s,88.858970GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",370.286928GB/s,370.286928GB/s,370.286928GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",12.039397GB/s,12.039397GB/s,12.039397GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1184.455490GB/s,1184.455490GB/s,1184.455490GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",12.024472GB/s,12.024472GB/s,12.024472GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1184.455801GB/s,1184.455801GB/s,1184.455801GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",37.013672KB/s,37.013672KB/s,37.012695KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",26219520,26219520,26219520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",31846400,31846400,31846400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",31867551,31867551,31867551 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007496%,0.007496%,0.007496% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.070154%,0.070154%,0.070154% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.628439%,0.628439%,0.628439% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",91.902745%,91.902745%,91.902745% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.008994%,0.008994%,0.008994% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003535%,0.003535%,0.003535% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000107%,0.000107%,0.000107% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",211025920,211025920,211025920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",775946240,775946240,775946240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",21299200,21299200,21299200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",31867551,31867551,31867551 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",24258560,24258560,24258560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",24258560,24258560,24258560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",1705320,1705320,1705320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",7.153044%,7.153044%,7.153044% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.225486%,0.225486%,0.225486% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.114195,0.114195,0.114195 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.079888,0.079888,0.079888 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.997189%,1.997189%,1.997189% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",94.248643%,94.248643%,94.248643% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.987923,0.987923,0.987923 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.222283,0.222283,0.222283 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",38975.000000,38975.000000,38975.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.025176%,99.025176%,99.025176% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000125,0.000125,0.000125 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",629435886,629435886,629435886 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",51,51,51 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",610264802,610264802,610264802 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",96991,96991,96991 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",6.198731%,6.198731%,6.198731% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",3.552743%,3.552743%,3.552743% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",792.159344GB/s,792.159344GB/s,792.159344GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",128.921580MB/s,128.921580MB/s,128.921579MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",871.138821GB/s,871.138821GB/s,871.138821GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",817.114332GB/s,817.114332GB/s,817.114332GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",817.044530GB/s,817.044530GB/s,817.044530GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",69.416016KB/s,69.416016KB/s,69.415039KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.804688KB/s,6.804688KB/s,6.803711KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",164541440,164541440,164541440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",164561997,164561997,164561997 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003569%,0.003569%,0.003569% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.384987%,0.384987%,0.384987% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.608477%,99.608477%,99.608477% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001198%,0.001198%,0.001198% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000482%,0.000482%,0.000482% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000811%,0.000811%,0.000811% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000136%,0.000136%,0.000136% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3880550400,3880550400,3880550400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",164561997,164561997,164561997 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",629489660,629489660,629489660 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000010%,0.000010%,0.000010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000329%,0.000329%,0.000329% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.083188,0.083188,0.083188 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.077131,0.077131,0.077131 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.928283%,1.928283%,1.928283% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",92.916694%,92.916694%,92.916694% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999519,0.999519,0.999519 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.077342,0.077342,0.077342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31551.000000,31551.000000,31551.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.795799%,98.795799%,98.795799% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000099,0.000099,0.000099 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",335540586,335540586,335540586 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",335364962,335364962,335364962 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",108903,108903,108903 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000898%,0.000898%,0.000898% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.046038%,50.046038%,50.046038% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",833.655064GB/s,833.655064GB/s,833.655064GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",277.209753MB/s,277.209753MB/s,277.209752MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1668.252739GB/s,1668.252739GB/s,1668.252739GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",834.093427GB/s,834.093427GB/s,834.093427GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",834.091633GB/s,834.091633GB/s,834.091633GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",93.835938KB/s,93.835938KB/s,93.834961KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.032227KB/s,13.032227KB/s,13.031250KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",161541120,161541120,161541120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",126543250,126543250,126543250 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006180%,0.006180%,0.006180% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.546383%,0.546383%,0.546383% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.398489%,99.398489%,99.398489% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.045142%,0.045142%,0.045142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000861%,0.000861%,0.000861% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002468%,0.002468%,0.002468% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000008%,0.000008%,0.000008% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2664202240,2664202240,2664202240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",126543250,126543250,126543250 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",335541308,335541308,335541308 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000018%,0.000018%,0.000018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000450%,0.000450%,0.000450% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.105910,0.105910,0.105910 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.105921,0.105921,0.105921 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.648023%,2.648023%,2.648023% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.357815%,99.357815%,99.357815% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999294,0.999294,0.999294 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.106148,0.106148,0.106148 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",10626.000000,10626.000000,10626.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.795996%,98.795996%,98.795996% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001206,0.001206,0.001206 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",55342302,55342302,55342302 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335568486,335568486,335568486 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",51015430,51015430,51015430 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",79641740,79641740,79641740 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",840.391360GB/s,840.391360GB/s,840.391360GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",840.391360GB/s,840.391360GB/s,840.391360GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",840.391360GB/s,840.391360GB/s,840.391360GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",6.843094%,6.843094%,6.843094% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",7.614401%,7.614401%,7.614401% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",77.073293%,77.073293%,77.073293% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",127.771278GB/s,127.771278GB/s,127.771278GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",199.467630GB/s,199.467630GB/s,199.467630GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",210.149133GB/s,210.149133GB/s,210.149133GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",138.211878GB/s,138.211878GB/s,138.211878GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",840.391360GB/s,840.391360GB/s,840.391360GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",138.608195GB/s,138.608195GB/s,138.608195GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",840.451885GB/s,840.451885GB/s,840.451885GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.130859KB/s,13.130859KB/s,13.129883KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",41948160,41948160,41948160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",36761600,36761600,36761600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",36805237,36805237,36805237 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006141%,0.006141%,0.006141% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.042497%,0.042497%,0.042497% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",8.287588%,8.287588%,8.287588% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",83.698403%,83.698403%,83.698403% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.006947%,0.006947%,0.006947% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006024%,0.006024%,0.006024% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000614%,0.000614%,0.000614% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",315883520,315883520,315883520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",838860800,838860800,838860800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",36805237,36805237,36805237 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",26224640,26224640,26224640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",26224640,26224640,26224640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",55184064,55184064,55184064 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",7.795535%,7.795535%,7.795535% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.156251%,0.156251%,0.156251% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.064599,0.064599,0.064599 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.045431,0.045431,0.045431 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.135786%,1.135786%,1.135786% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",67.983461%,67.983461%,67.983461% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.730807,0.730807,0.730807 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.118688,0.118688,0.118688 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",30783.000000,30783.000000,30783.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.765755%,98.765755%,98.765755% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000199,0.000199,0.000199 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",124433282,124433282,124433282 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",457654,457654,457654 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96231,96231,96231 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",62.777412%,62.777412%,62.777412% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",99.632826%,99.632826%,99.632826% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",3.079974GB/s,3.079974GB/s,3.079974GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",663.169879MB/s,663.169879MB/s,663.169878MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",4516.510017GB/s,4516.510017GB/s,4516.510017GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",840.555307GB/s,840.555307GB/s,840.555307GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",837.425909GB/s,837.425909GB/s,837.425909GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",254.045898KB/s,254.045898KB/s,254.044922KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",35.283203KB/s,35.283203KB/s,35.282227KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",157608960,157608960,157608960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",122623337,122623337,122623337 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.022855%,0.022855%,0.022855% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.849706%,1.849706%,1.849706% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",80.236492%,80.236492%,80.236492% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",17.396839%,17.396839%,17.396839% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003059%,0.003059%,0.003059% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.018156%,0.018156%,0.018156% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000787%,0.000787%,0.000787% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2538373120,2538373120,2538373120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",122623337,122623337,122623337 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",124898280,124898280,124898280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000064%,0.000064%,0.000064% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.472041%,0.472041%,0.472041% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.350434,0.350434,0.350434 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.350504,0.350504,0.350504 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",8.762609%,8.762609%,8.762609% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",78.848596%,78.848596%,78.848596% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.997364,0.997364,0.997364 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.646446,0.646446,0.646446 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39871.000000,39871.000000,39871.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.047083%,99.047083%,99.047083% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000128,0.000128,0.000128 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671088742,671088742,671088742 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",42,42,42 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",669866489,669866489,669866489 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",109370,109370,109370 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.164657%,0.164657%,0.164657% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",827.840421GB/s,827.840421GB/s,827.840421GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",138.406518MB/s,138.406518MB/s,138.406517MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",829.376101GB/s,829.376101GB/s,829.376101GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",829.350791GB/s,829.350791GB/s,829.350791GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",829.350917GB/s,829.350917GB/s,829.350917GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",54.425781KB/s,54.425781KB/s,54.424805KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.478516KB/s,6.478516KB/s,6.477539KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",169128960,169128960,169128960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",169150644,169150644,169150644 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003252%,0.003252%,0.003252% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.359025%,0.359025%,0.359025% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.634259%,99.634259%,99.634259% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001115%,0.001115%,0.001115% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000438%,0.000438%,0.000438% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001473%,0.001473%,0.001473% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000119%,0.000119%,0.000119% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4027351040,4027351040,4027351040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",169150644,169150644,169150644 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000309%,0.000309%,0.000309% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.079226,0.079226,0.079226 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.071977,0.071977,0.071977 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.799436%,1.799436%,1.799436% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",95.671312%,95.671312%,95.671312% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999162,0.999162,0.999162 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072175,0.072175,0.072175 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",10946.000000,10946.000000,10946.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.831194%,98.831194%,98.831194% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000589,0.000589,0.000589 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",17187118,17187118,17187118 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167796308,167796308,167796308 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",15276210,15276210,15276210 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",44501544,44501544,44501544 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1133.087468GB/s,1133.087468GB/s,1133.087468GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1133.087468GB/s,1133.087468GB/s,1133.087468GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",13.939077%,13.939077%,13.939077% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",55.621194%,55.621194%,55.621194% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",74.087830%,74.087830%,74.087830% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",103.171361GB/s,103.171361GB/s,103.171361GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",300.551306GB/s,300.551306GB/s,300.551306GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",637.500017GB/s,637.500017GB/s,637.500017GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",116.317749GB/s,116.317749GB/s,116.317749GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1133.087468GB/s,1133.087468GB/s,1133.087468GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",116.077113GB/s,116.077113GB/s,116.077113GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1133.250557GB/s,1133.250557GB/s,1133.250557GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",35.408203KB/s,35.408203KB/s,35.407227KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",47191040,47191040,47191040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",38400000,38400000,38400000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",38422614,38422614,38422614 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008343%,0.008343%,0.008343% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.053265%,0.053265%,0.053265% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",3.817347%,3.817347%,3.817347% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",90.294300%,90.294300%,90.294300% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.007722%,0.007722%,0.007722% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003487%,0.003487%,0.003487% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000283%,0.000283%,0.000283% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",336855040,336855040,336855040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",859832320,859832320,859832320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",21299200,21299200,21299200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",38422614,38422614,38422614 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",26880000,26880000,26880000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",26880000,26880000,26880000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",17222748,17222748,17222748 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",5.473491%,5.473491%,5.473491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.341761%,0.341761%,0.341761% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.120873,0.120873,0.120873 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.091544,0.091544,0.091544 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.288597%,2.288597%,2.288597% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",94.963502%,94.963502%,94.963502% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.985541,0.985541,0.985541 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.307034,0.307034,0.307034 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31167.000000,31167.000000,31167.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.780962%,98.780962%,98.780962% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000224,0.000224,0.000224 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",8174734,8174734,8174734 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",28,28,28 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",208840,208840,208840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",85807,85807,85807 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",97.468553%,97.468553%,97.468553% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",97.434534%,97.434534%,97.434534% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",1.847904GB/s,1.847904GB/s,1.847904GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",777.478381MB/s,777.478381MB/s,777.478380MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5938.254240GB/s,5938.254240GB/s,5938.254240GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",75.159598GB/s,75.159598GB/s,75.159598GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",72.333466GB/s,72.333466GB/s,72.333466GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",259.790039KB/s,259.790039KB/s,259.789062KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.390625KB/s,46.390625KB/s,46.389648KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",124564480,124564480,124564480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",124592392,124592392,124592392 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023592%,0.023592%,0.023592% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.931408%,1.931408%,1.931408% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",67.018582%,67.018582%,67.018582% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",30.159776%,30.159776%,30.159776% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003151%,0.003151%,0.003151% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.021828%,0.021828%,0.021828% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000862%,0.000862%,0.000862% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2601287680,2601287680,2601287680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",124592392,124592392,124592392 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",8494128,8494128,8494128 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.840736%,0.840736%,0.840736% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.395020,0.395020,0.395020 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.370099,0.370099,0.370099 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.252474%,9.252474%,9.252474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",98.723139%,98.723139%,98.723139% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.998116,0.998116,0.998116 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.902395,0.902395,0.902395 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_per_warp","Instructions per warp",8710.000000,8710.000000,8710.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.531142%,98.531142%,98.531142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_replay_overhead","Instruction Replay Overhead",0.000732,0.000775,0.000753 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gst_transactions","Global Store Transactions",16777216,16777216,16777216 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_read_transactions","L2 Read Transactions",3094,3106,3100 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_write_transactions","L2 Write Transactions",16777232,16777232,16777232 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"dram_read_transactions","Device Memory Read Transactions",42,114,78 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"dram_write_transactions","Device Memory Write Transactions",225,346,285 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gst_requested_throughput","Requested Global Store Throughput",69.965953GB/s,70.955833GB/s,70.457417GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gst_throughput","Global Store Throughput",69.965953GB/s,70.955833GB/s,70.457417GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"tex_cache_hit_rate","Unified Cache Hit Rate",5.867992%,5.867992%,5.867992% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",99.375000%,99.375000%,99.375000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999809%,99.999809%,99.999809% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"dram_read_throughput","Device Memory Read Throughput",186.258789KB/s,498.506836KB/s,343.478516KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"dram_write_throughput","Device Memory Write Throughput",997.815430KB/s,1.477552MB/s,1.227757MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"tex_cache_throughput","Unified cache to SM throughput",8.762826GB/s,8.886802GB/s,8.824379GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",10.932179MB/s,11.086848MB/s,11.008970MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",69.965953GB/s,70.955833GB/s,70.457417GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_read_throughput","L2 Throughput (Reads)",13.212564MB/s,13.451466MB/s,13.331176MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_write_throughput","L2 Throughput (Writes)",69.966020GB/s,70.955901GB/s,70.457484GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"sysmem_write_throughput","System Memory Write Throughput",21.864258KB/s,22.172852KB/s,22.016602KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"tex_cache_transactions","Unified cache to SM transactions",1049600,1049600,1049600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_executed","Instructions Executed",5390336,8919040,7154688 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_issued","Instructions Issued",5394513,5394704,5394608 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007364%,0.007487%,0.007426% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.024642%,0.024899%,0.024771% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.169560%,2.174952%,2.172256% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_texture","Issue Stall Reasons (Texture)",0.000981%,0.000982%,0.000981% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_other","Issue Stall Reasons (Other)",0.002091%,0.002101%,0.002096% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006862%,0.007420%,0.007141% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000045%,0.000046%,0.000046% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_integer","Integer Instructions",31817728,31817728,31817728 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_control","Control-Flow Instructions",2097152,2097152,2097152 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_compute_ld_st","Load/Store Instructions",138412032,138412032,138412032 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_misc","Misc Instructions",98304,98304,98304 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"issue_slots","Issue Slots",5394513,5394704,5394608 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"cf_issued","Issued Control-Flow Instructions",67584,67584,67584 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"cf_executed","Executed Control-Flow Instructions",67584,67584,67584 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"ldst_issued","Issued Load/Store Instructions",4327424,4327424,4327424 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"ldst_executed","Executed Load/Store Instructions",4327424,4327424,4327424 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2560,2560,2560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.651284%,97.659430%,97.655357% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.129014%,0.130839%,0.129927% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",16777216,16777216,16777216 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"ipc","Executed IPC",0.009410,0.015274,0.012342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"issued_ipc","Issued IPC",0.009304,0.009416,0.009360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"issue_slot_utilization","Issue Slot Utilization",0.232589%,0.235411%,0.234000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"sm_efficiency","Multiprocessor Activity",81.089622%,81.100038%,81.094830% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"achieved_occupancy","Achieved Occupancy",0.210554,0.210573,0.210564 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.026614,0.026983,0.026798 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",2,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31807.000000,31807.000000,31807.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.805491%,98.805491%,98.805491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000114,0.000114,0.000114 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",150226118,150226118,150226118 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",23492,23492,23492 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",147212430,147212430,147212430 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",108923,108923,108923 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",55.232425%,55.232425%,55.232425% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.005096%,51.005096%,51.005096% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",710.354502GB/s,710.354502GB/s,710.354502GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",538.208072MB/s,538.208072MB/s,538.208071MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",3238.350084GB/s,3238.350084GB/s,3238.350084GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",724.843285GB/s,724.843285GB/s,724.843285GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",724.896663GB/s,724.896663GB/s,724.896663GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",116.078183MB/s,116.078183MB/s,116.078182MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",25.298828KB/s,25.298828KB/s,25.297852KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",127841280,127841280,127841280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",127856309,127856309,127856309 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.013077%,0.013077%,0.013077% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.090938%,1.090938%,1.090938% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.885235%,98.885235%,98.885235% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.005149%,0.005149%,0.005149% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.001725%,0.001725%,0.001725% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002797%,0.002797%,0.002797% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000352%,0.000352%,0.000352% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2706145280,2706145280,2706145280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",127856309,127856309,127856309 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",150215056,150215056,150215056 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000036%,0.000036%,0.000036% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000691%,0.000691%,0.000691% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.196899,0.196899,0.196899 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.210711,0.210711,0.210711 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.267785%,5.267785%,5.267785% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.052725%,99.052725%,99.052725% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.998957,0.998957,0.998957 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.211143,0.211143,0.211143 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31423.000000,31423.000000,31423.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.790894%,98.790894%,98.790894% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000142,0.000142,0.000142 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",63801890,63801890,63801890 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",37,37,37 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",979822,979822,979822 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",97211,97211,97211 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",81.472398%,81.472398%,81.472398% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",100.882906%,100.882906%,100.882906% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",8.596316GB/s,8.596316GB/s,8.596316GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",873.334346MB/s,873.334346MB/s,873.334345MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5887.871370GB/s,5887.871370GB/s,5887.871370GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",545.424041GB/s,545.424041GB/s,545.424041GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",559.755948GB/s,559.755948GB/s,559.755948GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",340.381836KB/s,340.381836KB/s,340.380859KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",45.997070KB/s,45.997070KB/s,45.996094KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",125875200,125875200,125875200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",125893065,125893065,125893065 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.022886%,0.022886%,0.022886% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.948150%,1.948150%,1.948150% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",85.433051%,85.433051%,85.433051% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",12.244077%,12.244077%,12.244077% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003128%,0.003128%,0.003128% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.012481%,0.012481%,0.012481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000662%,0.000662%,0.000662% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2643230720,2643230720,2643230720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",125893065,125893065,125893065 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",62168316,62168316,62168316 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.335499%,0.335499%,0.335499% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.315145,0.315145,0.315145 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.372526,0.372526,0.372526 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.313153%,9.313153%,9.313153% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.363452%,99.363452%,99.363452% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.998401,0.998401,0.998401 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.584339,0.584339,0.584339 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31551.000000,31551.000000,31551.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.795799%,98.795799%,98.795799% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000113,0.000113,0.000113 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",100194410,100194410,100194410 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",23607,23607,23607 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",75144894,75144894,75144894 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",109356,109356,109356 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",70.118128%,70.118128%,70.118128% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",62.592775%,62.592775%,62.592775% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",501.703008GB/s,501.703008GB/s,501.703008GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",747.635305MB/s,747.635305MB/s,747.635304MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",4480.643282GB/s,4480.643282GB/s,4480.643282GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",669.429614GB/s,669.429614GB/s,669.429614GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",668.945477GB/s,668.945477GB/s,668.945477GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",161.394222MB/s,161.394222MB/s,161.394221MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",35.003906KB/s,35.003906KB/s,35.002930KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",126530560,126530560,126530560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",126544678,126544678,126544678 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (7)","High (7)","High (7)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.019797%,0.019797%,0.019797% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.523565%,1.523565%,1.523565% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.409943%,98.409943%,98.409943% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.037048%,0.037048%,0.037048% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.002431%,0.002431%,0.002431% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.005325%,0.005325%,0.005325% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000476%,0.000476%,0.000476% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2664202240,2664202240,2664202240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",126544678,126544678,126544678 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",100266924,100266924,100266924 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000051%,0.000051%,0.000051% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001364%,0.001364%,0.001364% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.234985,0.234985,0.234985 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.292836,0.292836,0.292836 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",7.320910%,7.320910%,7.320910% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",98.049666%,98.049666%,98.049666% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999063,0.999063,0.999063 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.293731,0.293731,0.293731 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",20929.000000,20929.000000,20929.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.796228%,98.796228%,98.796228% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000876,0.000876,0.000876 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",151943934,151943934,151943934 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167796353,167796353,167796353 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",149829066,149829066,149829066 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",155357734,155357734,155357734 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",355.074371GB/s,355.074371GB/s,355.074371GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",355.074371GB/s,355.074371GB/s,355.074371GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",4.659828%,4.659828%,4.659828% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.541528%,50.541528%,50.541528% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",7.910695%,7.910695%,7.910695% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",317.099461GB/s,317.099461GB/s,317.099461GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",328.800379GB/s,328.800379GB/s,328.800379GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",710.192087GB/s,710.192087GB/s,710.192087GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",321.982662GB/s,321.982662GB/s,321.982662GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",355.074371GB/s,355.074371GB/s,355.074371GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",321.575384GB/s,321.575384GB/s,321.575384GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",355.125574GB/s,355.125574GB/s,355.125574GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",11.095703KB/s,11.095703KB/s,11.094727KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",167777280,167777280,167777280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",72145920,72145920,72145920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",72209105,72209105,72209105 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007787%,0.007787%,0.007787% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.094023%,0.094023%,0.094023% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",7.279692%,7.279692%,7.279692% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",91.822705%,91.822705%,91.822705% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.005495%,0.005495%,0.005495% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.005873%,0.005873%,0.005873% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001494%,0.001494%,0.001494% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",798064640,798064640,798064640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",147128320,147128320,147128320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",72209105,72209105,72209105 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",41953280,41953280,41953280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",41953280,41953280,41953280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",152136372,152136372,152136372 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.395419%,0.395419%,0.395419% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.387512%,0.387512%,0.387512% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.085014,0.085014,0.085014 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.062642,0.062642,0.062642 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.566062%,1.566062%,1.566062% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",81.657954%,81.657954%,81.657954% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.742337,0.742337,0.742337 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.246661,0.246661,0.246661 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",17794.000000,17794.000000,17794.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.281008%,99.281008%,99.281008% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001111,0.001111,0.001111 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",333685646,333685646,333685646 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544370,335544370,335544370 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",334073058,334073058,334073058 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",335197847,335197847,335197847 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",374.500346GB/s,374.500346GB/s,374.500346GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",374.500346GB/s,374.500346GB/s,374.500346GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",374.500346GB/s,374.500346GB/s,374.500346GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.362986%,0.362986%,0.362986% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.042024%,0.042024%,0.042024% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.111960%,0.111960%,0.111960% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",372.858274GB/s,372.858274GB/s,372.858274GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",374.113648GB/s,374.113648GB/s,374.113648GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",374.523204GB/s,374.523204GB/s,374.523204GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",371.781576GB/s,371.781576GB/s,371.781576GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",374.500346GB/s,374.500346GB/s,374.500346GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",372.425884GB/s,372.425884GB/s,372.425884GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",374.500402GB/s,374.500402GB/s,374.500402GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",5.851562KB/s,5.851562KB/s,5.850586KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",167777280,167777280,167777280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",73461760,73461760,73461760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",73543356,73543356,73543356 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.002905%,0.002905%,0.002905% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.061459%,0.061459%,0.061459% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",16.449925%,16.449925%,16.449925% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",83.126354%,83.126354%,83.126354% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.003871%,0.003871%,0.003871% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003784%,0.003784%,0.003784% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000742%,0.000742%,0.000742% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",819200000,819200000,819200000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",178585600,178585600,178585600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",73543356,73543356,73543356 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",41953280,41953280,41953280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",41953280,41953280,41953280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",333108360,333108360,333108360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.188819%,0.188819%,0.188819% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.162141%,0.162141%,0.162141% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.038163,0.038163,0.038163 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.031360,0.031360,0.031360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.784000%,0.784000%,0.784000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",88.367416%,88.367416%,88.367416% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.598904,0.598904,0.598904 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.092132,0.092132,0.092132 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",12226.000000,12226.000000,12226.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.953562%,98.953562%,98.953562% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000738,0.000738,0.000738 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",51180906,51180906,51180906 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772196,167772196,167772196 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",51248678,51248678,51248678 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",69576882,69576882,69576882 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",966.996786GB/s,966.996786GB/s,966.996786GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",966.996786GB/s,966.996786GB/s,966.996786GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",7.024512%,7.024512%,7.024512% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.065058%,50.065058%,50.065058% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",58.959637%,58.959637%,58.959637% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",295.384568GB/s,295.384568GB/s,295.384568GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",401.023753GB/s,401.023753GB/s,401.023753GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",785.802931GB/s,785.802931GB/s,785.802931GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",297.320369GB/s,297.320369GB/s,297.320369GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",966.996786GB/s,966.996786GB/s,966.996786GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",294.993947GB/s,294.993947GB/s,294.993947GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",966.996994GB/s,966.996994GB/s,966.996994GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",30.217773KB/s,30.217773KB/s,30.216797KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",68162560,68162560,68162560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",44953600,44953600,44953600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",44986780,44986780,44986780 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006364%,0.006364%,0.006364% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.056136%,0.056136%,0.056136% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",7.837588%,7.837588%,7.837588% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",89.496092%,89.496092%,89.496092% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.010306%,0.010306%,0.010306% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006544%,0.006544%,0.006544% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000848%,0.000848%,0.000848% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",462684160,462684160,462684160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",943718400,943718400,943718400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",21299200,21299200,21299200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",44986780,44986780,44986780 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",29501440,29501440,29501440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",29501440,29501440,29501440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",51584536,51584536,51584536 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",2.200526%,2.200526%,2.200526% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.385595%,0.385595%,0.385595% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.111166,0.111166,0.111166 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.090666,0.090666,0.090666 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.266640%,2.266640%,2.266640% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",95.835314%,95.835314%,95.835314% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.988593,0.988593,0.988593 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.334565,0.334565,0.334565 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",8902.000000,8902.000000,8902.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.562823%,98.562823%,98.562823% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000365,0.000365,0.000365 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",10486,10486,10486 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544336,335544336,335544336 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",322,322,322 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",2224,2224,2224 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",530.828023GB/s,530.828023GB/s,530.828023GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",530.828023GB/s,530.828023GB/s,530.828023GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",530.828023GB/s,530.828023GB/s,530.828023GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.027344%,3.027344%,3.027344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",97.500000%,97.500000%,97.500000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999924%,99.999924%,99.999924% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",534.145508KB/s,534.145508KB/s,534.144531KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",3.602787MB/s,3.602787MB/s,3.602786MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",16.620775GB/s,16.620775GB/s,16.620775GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",16.588375MB/s,16.588375MB/s,16.588374MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",530.828023GB/s,530.828023GB/s,530.828023GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",16.986885MB/s,16.986885MB/s,16.986884MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",530.828049GB/s,530.828049GB/s,530.828049GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",8.293945KB/s,8.293945KB/s,8.292969KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",27934720,27934720,27934720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",27944784,27944784,27944784 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.001769%,0.001769%,0.001769% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.010093%,0.010093%,0.010093% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.448331%,0.448331%,0.448331% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.057293%,0.057293%,0.057293% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000613%,0.000613%,0.000613% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000773%,0.000773%,0.000773% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000054%,0.000054%,0.000054% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",180224000,180224000,180224000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",27944784,27944784,27944784 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",99.210801%,99.210801%,99.210801% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.270274%,0.270274%,0.270274% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.024738,0.024738,0.024738 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.015353,0.015353,0.015353 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.383814%,0.383814%,0.383814% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",95.937674%,95.937674%,95.937674% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.984025,0.984025,0.984025 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.184196,0.184196,0.184196 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",12098.000000,12098.000000,12098.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.942490%,98.942490%,98.942490% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000869,0.000869,0.000869 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",125289990,125289990,125289990 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544372,335544372,335544372 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",123040558,123040558,123040558 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",132075774,132075774,132075774 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",722.252364GB/s,722.252364GB/s,722.252364GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",722.252364GB/s,722.252364GB/s,722.252364GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",722.252364GB/s,722.252364GB/s,722.252364GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",2.429148%,2.429148%,2.429148% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",1.432363%,1.432363%,1.432363% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",60.967114%,60.967114%,60.967114% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",264.842313GB/s,264.842313GB/s,264.842313GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",284.290433GB/s,284.290433GB/s,284.290433GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",293.459106GB/s,293.459106GB/s,293.459106GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",268.742964GB/s,268.742964GB/s,268.742964GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",722.252364GB/s,722.252364GB/s,722.252364GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",269.684170GB/s,269.684170GB/s,269.684170GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",722.252476GB/s,722.252476GB/s,722.252476GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",11.285156KB/s,11.285156KB/s,11.284180KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",68162560,68162560,68162560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",44298240,44298240,44298240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",44337546,44337546,44337546 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (7)","High (7)","High (7)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.004514%,0.004514%,0.004514% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.050537%,0.050537%,0.050537% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",14.068879%,14.068879%,14.068879% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",79.046828%,79.046828%,79.046828% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.004892%,0.004892%,0.004892% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002817%,0.002817%,0.002817% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001355%,0.001355%,0.001355% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",420741120,420741120,420741120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",943718400,943718400,943718400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",42270720,42270720,42270720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",44337546,44337546,44337546 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",29501440,29501440,29501440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",29501440,29501440,29501440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",124852724,124852724,124852724 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",6.642699%,6.642699%,6.642699% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.177477%,0.177477%,0.177477% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.058909,0.058909,0.058909 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.044350,0.044350,0.044350 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.108756%,1.108756%,1.108756% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",72.781624%,72.781624%,72.781624% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.736558,0.736558,0.736558 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.127604,0.127604,0.127604 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000334,0.000334,0.000334 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",5222,5222,5222 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",1234,1234,1234 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",1016,1016,1016 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.998474%,99.998474%,99.998474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",95.000000%,95.000000%,95.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",11.209998MB/s,11.209998MB/s,11.209997MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",9.229626MB/s,9.229626MB/s,9.229625MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5953.654108GB/s,5953.654108GB/s,5953.654108GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",46.511503MB/s,46.511503MB/s,46.511502MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",47.438099MB/s,47.438099MB/s,47.438098MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.835938KB/s,148.835938KB/s,148.834961KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.510742KB/s,46.510742KB/s,46.509766KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",122240000,122240000,122240000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",87255399,87255399,87255399 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.017366%,0.017366%,0.017366% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.390457%,1.390457%,1.390457% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",52.977964%,52.977964%,52.977964% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",44.306348%,44.306348%,44.306348% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003053%,0.003053%,0.003053% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.027975%,0.027975%,0.027975% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000993%,0.000993%,0.000993% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",87255399,87255399,87255399 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000065%,0.000065%,0.000065% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.275778%,1.275778%,1.275778% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.259502,0.259502,0.259502 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.259589,0.259589,0.259589 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",6.489714%,6.489714%,6.489714% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.721493%,99.721493%,99.721493% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.990911,0.990911,0.990911 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.077094,1.077094,1.077094 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000332,0.000332,0.000332 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",5798,5798,5798 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",138,138,138 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",168,168,168 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.998474%,99.998474%,99.998474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",98.750000%,98.750000%,98.750000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",1.253189MB/s,1.253189MB/s,1.253188MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",1.525621MB/s,1.525621MB/s,1.525620MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5951.561821GB/s,5951.561821GB/s,5951.561821GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",46.495157MB/s,46.495157MB/s,46.495156MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",52.652133MB/s,52.652133MB/s,52.652132MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.784180KB/s,148.784180KB/s,148.783203KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.495117KB/s,46.495117KB/s,46.494141KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",87229440,87229440,87229440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",87258366,87258366,87258366 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.016952%,0.016952%,0.016952% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.356509%,1.356509%,1.356509% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",55.138746%,55.138746%,55.138746% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",42.250546%,42.250546%,42.250546% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003027%,0.003027%,0.003027% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.023430%,0.023430%,0.023430% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001062%,0.001062%,0.001062% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",87258366,87258366,87258366 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000064%,0.000064%,0.000064% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.209664%,1.209664%,1.209664% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.347575,0.347575,0.347575 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.259693,0.259693,0.259693 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",6.492324%,6.492324%,6.492324% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.709065%,99.709065%,99.709065% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.989412,0.989412,0.989412 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.053976,1.053976,1.053976 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31807.000000,31807.000000,31807.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.805491%,98.805491%,98.805491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000112,0.000112,0.000112 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",162800438,162800438,162800438 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",23519,23519,23519 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",162567494,162567494,162567494 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",108954,108954,108954 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",51.481752%,51.481752%,51.481752% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.043580%,50.043580%,50.043580% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",726.544210GB/s,726.544210GB/s,726.544210GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",498.622039MB/s,498.622039MB/s,498.622038MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",2999.310832GB/s,2999.310832GB/s,2999.310832GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",727.584324GB/s,727.584324GB/s,727.584324GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",727.585280GB/s,727.585280GB/s,727.585280GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",107.633421MB/s,107.633421MB/s,107.633420MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.430664KB/s,23.430664KB/s,23.429688KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",127841280,127841280,127841280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",127855629,127855629,127855629 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.012338%,0.012338%,0.012338% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.005503%,1.005503%,1.005503% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.971005%,98.971005%,98.971005% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.003846%,0.003846%,0.003846% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.001585%,0.001585%,0.001585% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004689%,0.004689%,0.004689% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000413%,0.000413%,0.000413% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2706145280,2706145280,2706145280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",127855629,127855629,127855629 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",162800224,162800224,162800224 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000588%,0.000588%,0.000588% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.186178,0.186178,0.186178 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.194035,0.194035,0.194035 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.850873%,4.850873%,4.850873% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",98.690763%,98.690763%,98.690763% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999100,0.999100,0.999100 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.194410,0.194410,0.194410 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",8706.000000,8706.000000,8706.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.530467%,98.530467%,98.530467% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000138,0.000138,0.000138 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",21014,21014,21014 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886096,83886096,83886096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",20586,20586,20586 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",1752034,1752034,1752034 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1019.718085GB/s,1019.718085GB/s,1019.718085GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1019.718085GB/s,1019.718085GB/s,1019.718085GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.859375%,5.859375%,5.859375% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.973140%,99.973140%,99.973140% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",256.248981MB/s,256.248981MB/s,256.248980MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",21.297702GB/s,21.297702GB/s,21.297702GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",127.713715GB/s,127.713715GB/s,127.713715GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",254.929521MB/s,254.929521MB/s,254.929520MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1019.718085GB/s,1019.718085GB/s,1019.718085GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",261.576609MB/s,261.576609MB/s,261.576608MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1019.718279GB/s,1019.718279GB/s,1019.718279GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",63.731445KB/s,63.731445KB/s,63.730469KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",44574720,44574720,44574720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",26939188,26939188,26939188 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.014090%,0.014090%,0.014090% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.069248%,0.069248%,0.069248% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.730620%,0.730620%,0.730620% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.041813%,0.041813%,0.041813% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.005303%,0.005303%,0.005303% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.007189%,0.007189%,0.007189% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000423%,0.000423%,0.000423% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",148111360,148111360,148111360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",26939188,26939188,26939188 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",20480,20480,20480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",96.690855%,96.690855%,96.690855% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",2.440460%,2.440460%,2.440460% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.129110,0.129110,0.129110 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.129128,0.129128,0.129128 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",3.228199%,3.228199%,3.228199% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",84.141422%,84.141422%,84.141422% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.945729,0.945729,0.945729 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.590688,1.590688,1.590688 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",16898.000000,16898.000000,16898.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.242884%,99.242884%,99.242884% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000847,0.000847,0.000847 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",63661750,63661750,63661750 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910335,83910335,83910335 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",63659434,63659434,63659434 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",76713331,76713331,76713331 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",415.133647GB/s,415.133647GB/s,415.133647GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",415.133647GB/s,415.133647GB/s,415.133647GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",38.851045%,38.851045%,38.851045% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.262872%,50.262872%,50.262872% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",8.847523%,8.847523%,8.847523% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",315.036452GB/s,315.036452GB/s,315.036452GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",379.637300GB/s,379.637300GB/s,379.637300GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1556.852528GB/s,1556.852528GB/s,1556.852528GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",314.684782GB/s,314.684782GB/s,314.684782GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",415.133647GB/s,415.133647GB/s,415.133647GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",315.047913GB/s,315.047913GB/s,315.047913GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",415.253680GB/s,415.253680GB/s,415.253680GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",25.945312KB/s,25.945312KB/s,25.944336KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",157291520,157291520,157291520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",86517760,86517760,86517760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",68926569,68926569,68926569 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006654%,0.006654%,0.006654% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.082467%,0.082467%,0.082467% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.573685%,2.573685%,2.573685% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.800785%,95.800785%,95.800785% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.005234%,0.005234%,0.005234% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004855%,0.004855%,0.004855% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000912%,0.000912%,0.000912% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",766771200,766771200,766771200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1300234240,1300234240,1300234240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",126156800,126156800,126156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",68926569,68926569,68926569 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",40642560,40642560,40642560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",40642560,40642560,40642560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",63588372,63588372,63588372 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.762412%,0.762412%,0.762412% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.762996%,0.762996%,0.762996% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.115721,0.115721,0.115721 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.115819,0.115819,0.115819 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",2.895478%,2.895478%,2.895478% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",98.652988%,98.652988%,98.652988% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.992432,0.992432,0.992432 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.599500,0.599500,0.599500 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",29247.000000,29247.000000,29247.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.700935%,98.700935%,98.700935% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000253,0.000253,0.000253 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",61542,61542,61542 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",56,56,56 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",61450,61450,61450 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",22566,22566,22566 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.981689%,99.981689%,99.981689% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",557.710083MB/s,557.710083MB/s,557.710082MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",204.805301MB/s,204.805301MB/s,204.805300MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5948.120987GB/s,5948.120987GB/s,5948.120987GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",557.619325MB/s,557.619325MB/s,557.619324MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",558.545060MB/s,558.545060MB/s,558.545059MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",520.444336KB/s,520.444336KB/s,520.443359KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.467773KB/s,46.467773KB/s,46.466797KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",149744640,149744640,149744640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",114763149,114763149,114763149 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.024974%,0.024974%,0.024974% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.787303%,1.787303%,1.787303% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",55.842278%,55.842278%,55.842278% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",40.986914%,40.986914%,40.986914% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003081%,0.003081%,0.003081% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.023904%,0.023904%,0.023904% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000994%,0.000994%,0.000994% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2286714880,2286714880,2286714880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",114763149,114763149,114763149 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",61440,61440,61440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.330486%,1.330486%,1.330486% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.341133,0.341133,0.341133 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.341332,0.341332,0.341332 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",8.533295%,8.533295%,8.533295% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.760921%,99.760921%,99.760921% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.982695,0.982695,0.982695 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.186720,1.186720,1.186720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",9666.000000,9666.000000,9666.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.676417%,98.676417%,98.676417% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000452,0.000452,0.000452 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",102502,102502,102502 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886125,83886125,83886125 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",102494,102494,102494 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",2473392,2473392,2473392 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1258.546791GB/s,1258.546791GB/s,1258.546791GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1258.546791GB/s,1258.546791GB/s,1258.546791GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",23.716518%,23.716518%,23.716518% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.622531%,99.622531%,99.622531% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",1.537722GB/s,1.537722GB/s,1.537722GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",37.108416GB/s,37.108416GB/s,37.108416GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",786.899007GB/s,786.899007GB/s,786.899007GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1.536312GB/s,1.536312GB/s,1.536312GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1258.546791GB/s,1258.546791GB/s,1258.546791GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",1.537842GB/s,1.537842GB/s,1.537842GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1258.547466GB/s,1258.547466GB/s,1258.547466GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",78.658203KB/s,78.658203KB/s,78.657227KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",26219520,26219520,26219520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",31846400,31846400,31846400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",31860803,31860803,31860803 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.015466%,0.015466%,0.015466% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.116265%,0.116265%,0.116265% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.812371%,0.812371%,0.812371% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",90.874715%,90.874715%,90.874715% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.011149%,0.011149%,0.011149% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006585%,0.006585%,0.006585% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000705%,0.000705%,0.000705% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",221511680,221511680,221511680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",775946240,775946240,775946240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",31860803,31860803,31860803 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",24258560,24258560,24258560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",24258560,24258560,24258560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",102400,102400,102400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",7.664356%,7.664356%,7.664356% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.498388%,0.498388%,0.498388% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.228147,0.228147,0.228147 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.166387,0.166387,0.166387 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.159677%,4.159677%,4.159677% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",96.261146%,96.261146%,96.261146% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.980801,0.980801,0.980801 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.473491,0.473491,0.473491 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",11522.000000,11522.000000,11522.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.889624%,98.889624%,98.889624% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000477,0.000477,0.000477 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",4383522,4383522,4383522 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910321,83910321,83910321 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",582709,582709,582709 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",8809496,8809496,8809496 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1193.138594GB/s,1193.138594GB/s,1193.138594GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1193.138594GB/s,1193.138594GB/s,1193.138594GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",37.613020%,37.613020%,37.613020% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",92.348062%,92.348062%,92.348062% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",90.528035%,90.528035%,90.528035% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",8.288057GB/s,8.288057GB/s,8.288057GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",125.300284GB/s,125.300284GB/s,125.300284GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1640.856860GB/s,1640.856860GB/s,1640.856860GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",62.974206GB/s,62.974206GB/s,62.974206GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1193.138594GB/s,1193.138594GB/s,1193.138594GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",62.348238GB/s,62.348238GB/s,62.348238GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1193.483382GB/s,1193.483382GB/s,1193.483382GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",74.570312KB/s,74.570312KB/s,74.569336KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",57676800,57676800,57676800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",58992640,58992640,58992640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",41371148,41371148,41371148 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018752%,0.018752%,0.018752% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.147043%,0.147043%,0.147043% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.917015%,0.917015%,0.917015% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.251640%,95.251640%,95.251640% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.009463%,0.009463%,0.009463% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.010147%,0.010147%,0.010147% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001870%,0.001870%,0.001870% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",357826560,357826560,357826560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",901775360,901775360,901775360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",52756480,52756480,52756480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",41371148,41371148,41371148 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",28190720,28190720,28190720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",28190720,28190720,28190720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",4427532,4427532,4427532 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",2.732294%,2.732294%,2.732294% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.911775%,0.911775%,0.911775% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.218582,0.218582,0.218582 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.218686,0.218686,0.218686 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.467159%,5.467159%,5.467159% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",89.676144%,89.676144%,89.676144% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.982292,0.982292,0.982292 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.786935,0.786935,0.786935 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",30783.000000,30783.000000,30783.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.765755%,98.765755%,98.765755% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000235,0.000235,0.000235 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",8116362,8116362,8116362 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",143898,143898,143898 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",57950,57950,57950 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",97.641865%,97.641865%,97.641865% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",88.145598%,88.145598%,88.145598% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",1.273961GB/s,1.273961GB/s,1.273961GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",525.357199MB/s,525.357199MB/s,525.357198MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5941.477152GB/s,5941.477152GB/s,5941.477152GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",70.051888GB/s,70.051888GB/s,70.051888GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",71.855944GB/s,71.855944GB/s,71.855944GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.532227KB/s,148.532227KB/s,148.531250KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.416016KB/s,46.416016KB/s,46.415039KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",122598400,122598400,122598400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",122624104,122624104,122624104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.026371%,0.026371%,0.026371% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.957357%,1.957357%,1.957357% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",63.657994%,63.657994%,63.657994% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",33.303499%,33.303499%,33.303499% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003221%,0.003221%,0.003221% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.024247%,0.024247%,0.024247% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000898%,0.000898%,0.000898% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2538373120,2538373120,2538373120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",122624104,122624104,122624104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",7912588,7912588,7912588 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000068%,0.000068%,0.000068% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.026344%,1.026344%,1.026344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.417828,0.417828,0.417828 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.364395,0.364395,0.364395 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.109870%,9.109870%,9.109870% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",96.552942%,96.552942%,96.552942% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.990991,0.990991,0.990991 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.996364,0.996364,0.996364 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31807.000000,31807.000000,31807.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.805491%,98.805491%,98.805491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000085,0.000085,0.000085 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",153691674,153691674,153691674 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",152825270,152825270,152825270 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",96935,96935,96935 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",54.196126%,54.196126%,54.196126% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.308576%,50.308576%,50.308576% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",721.202018GB/s,721.202018GB/s,721.202018GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",468.427445MB/s,468.427445MB/s,468.427444MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",3167.049871GB/s,3167.049871GB/s,3167.049871GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",725.293625GB/s,725.293625GB/s,725.293625GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",725.290689GB/s,725.290689GB/s,725.290689GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",178.140625KB/s,178.140625KB/s,178.139648KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",24.741211KB/s,24.741211KB/s,24.740234KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",162851840,162851840,162851840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",127852167,127852167,127852167 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.012069%,0.012069%,0.012069% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.070334%,1.070334%,1.070334% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.903690%,98.903690%,98.903690% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.005713%,0.005713%,0.005713% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.001695%,0.001695%,0.001695% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.005469%,0.005469%,0.005469% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000403%,0.000403%,0.000403% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2706145280,2706145280,2706145280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",127852167,127852167,127852167 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",153692296,153692296,153692296 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000036%,0.000036%,0.000036% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000591%,0.000591%,0.000591% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.205778,0.205778,0.205778 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.205459,0.205459,0.205459 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.136483%,5.136483%,5.136483% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",98.344498%,98.344498%,98.344498% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999084,0.999084,0.999084 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.205835,0.205835,0.205835 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",10882.000000,10882.000000,10882.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.824320%,98.824320%,98.824320% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000569,0.000569,0.000569 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",703986,703986,703986 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886125,83886125,83886125 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",188398,188398,188398 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",6282438,6282438,6282438 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1247.246081GB/s,1247.246081GB/s,1247.246081GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1247.246081GB/s,1247.246081GB/s,1247.246081GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",35.473764%,35.473764%,35.473764% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",85.533847%,85.533847%,85.533847% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",92.926936%,92.926936%,92.926936% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",2.801164GB/s,2.801164GB/s,2.801164GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",93.409373GB/s,93.409373GB/s,93.409373GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1403.456344GB/s,1403.456344GB/s,1403.456344GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",10.255402GB/s,10.255402GB/s,10.255402GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1247.246081GB/s,1247.246081GB/s,1247.246081GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",10.467098GB/s,10.467098GB/s,10.467098GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1247.246750GB/s,1247.246750GB/s,1247.246750GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",77.952148KB/s,77.952148KB/s,77.951172KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",47191040,47191040,47191040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",38072320,38072320,38072320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",38093995,38093995,38093995 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018313%,0.018313%,0.018313% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.162246%,0.162246%,0.162246% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.451681%,0.451681%,0.451681% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.177560%,95.177560%,95.177560% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.013083%,0.013083%,0.013083% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.010004%,0.010004%,0.010004% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000577%,0.000577%,0.000577% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",315883520,315883520,315883520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",859832320,859832320,859832320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",31784960,31784960,31784960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",38093995,38093995,38093995 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",26880000,26880000,26880000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",26880000,26880000,26880000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",689748,689748,689748 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",3.402396%,3.402396%,3.402396% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.764141%,0.764141%,0.764141% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.252724,0.252724,0.252724 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.201449,0.201449,0.201449 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.036218%,5.036218%,5.036218% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",94.065586%,94.065586%,94.065586% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.984211,0.984211,0.984211 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.674706,0.674706,0.674706 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000337,0.000337,0.000337 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",2662,2662,2662 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",450,450,450 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",392,392,392 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.999237%,99.999237%,99.999237% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",95.000000%,95.000000%,95.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",4.088207MB/s,4.088207MB/s,4.088206MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",3.561282MB/s,3.561282MB/s,3.561281MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5954.065297GB/s,5954.065297GB/s,5954.065297GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",23.257358MB/s,23.257358MB/s,23.257357MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",24.184018MB/s,24.184018MB/s,24.184017MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.846680KB/s,148.846680KB/s,148.845703KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.514648KB/s,46.514648KB/s,46.513672KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",87229440,87229440,87229440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",87258807,87258807,87258807 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.017370%,0.017370%,0.017370% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.388204%,1.388204%,1.388204% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",54.793063%,54.793063%,54.793063% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",42.493075%,42.493075%,42.493075% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003054%,0.003054%,0.003054% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.023583%,0.023583%,0.023583% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000964%,0.000964%,0.000964% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",87258807,87258807,87258807 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2560,2560,2560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000065%,0.000065%,0.000065% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.280621%,1.280621%,1.280621% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.347494,0.347494,0.347494 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.259693,0.259693,0.259693 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",6.492333%,6.492333%,6.492333% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.791845%,99.791845%,99.791845% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.989870,0.989870,0.989870 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.081352,1.081352,1.081352 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",32195.000000,32195.000000,32195.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.819887%,98.819887%,98.819887% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000099,0.000099,0.000099 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",20582,20582,20582 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",4106,4106,4106 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",2661,2661,2661 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.996948%,99.996948%,99.996948% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",80.000000%,80.000000%,80.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",18.691507MB/s,18.691507MB/s,18.691506MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",12.113517MB/s,12.113517MB/s,12.113516MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",2983.448683GB/s,2983.448683GB/s,2983.448683GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",93.229926MB/s,93.229926MB/s,93.229925MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",93.694255MB/s,93.694255MB/s,93.694254MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",74.583008KB/s,74.583008KB/s,74.582031KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.306641KB/s,23.306641KB/s,23.305664KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",164838400,164838400,164838400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",129839526,129839526,129839526 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008738%,0.008738%,0.008738% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.985019%,0.985019%,0.985019% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.998290%,98.998290%,98.998290% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.003304%,0.003304%,0.003304% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001542%,0.001542%,0.001542% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002276%,0.002276%,0.002276% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000237%,0.000237%,0.000237% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",2769715200,2769715200,2769715200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",129839526,129839526,129839526 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",20480,20480,20480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000561%,0.000561%,0.000561% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.193420,0.193420,0.193420 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.193439,0.193439,0.193439 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",4.835973%,4.835973%,4.835973% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",99.904501%,99.904501%,99.904501% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999437,0.999437,0.999437 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.193792,0.193792,0.193792 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",12610.000000,12610.000000,12610.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.985428%,98.985428%,98.985428% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000440,0.000440,0.000440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",9415058,9415058,9415058 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886121,83886121,83886121 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",7440502,7440502,7440502 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",19027454,19027454,19027454 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1109.513412GB/s,1109.513412GB/s,1109.513412GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1109.513412GB/s,1109.513412GB/s,1109.513412GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",42.590197%,42.590197%,42.590197% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",60.962782%,60.962782%,60.962782% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",78.834486%,78.834486%,78.834486% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",98.411283GB/s,98.411283GB/s,98.411283GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",251.665299GB/s,251.665299GB/s,251.665299GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",2080.608524GB/s,2080.608524GB/s,2080.608524GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",124.614932GB/s,124.614932GB/s,124.614932GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1109.513412GB/s,1109.513412GB/s,1109.513412GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",124.527611GB/s,124.527611GB/s,124.527611GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1109.513954GB/s,1109.513954GB/s,1109.513954GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",69.343750KB/s,69.343750KB/s,69.342773KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",78648320,78648320,78648320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",46919680,46919680,46919680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",46941657,46941657,46941657 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.015665%,0.015665%,0.015665% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.177357%,0.177357%,0.177357% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.620442%,2.620442%,2.620442% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",94.336854%,94.336854%,94.336854% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.008472%,0.008472%,0.008472% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.013415%,0.013415%,0.013415% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001132%,0.001132%,0.001132% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",431226880,431226880,431226880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",985661440,985661440,985661440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",73728000,73728000,73728000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",46941657,46941657,46941657 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",30812160,30812160,30812160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",30812160,30812160,30812160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",9421660,9421660,9421660 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.684915%,1.684915%,1.684915% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.141748%,1.141748%,1.141748% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.245504,0.245504,0.245504 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.233916,0.233916,0.233916 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.847904%,5.847904%,5.847904% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",94.072954%,94.072954%,94.072954% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.985914,0.985914,0.985914 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.947242,0.947242,0.947242 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",13762.000000,13762.000000,13762.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.070357%,99.070357%,99.070357% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000575,0.000575,0.000575 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",23041358,23041358,23041358 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83909963,83909963,83909963 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",23090950,23090950,23090950 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",29452183,29452183,29452183 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",976.492315GB/s,976.492315GB/s,976.492315GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",976.492315GB/s,976.492315GB/s,976.492315GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",41.543860%,41.543860%,41.543860% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",49.388736%,49.388736%,49.388736% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",65.742016%,65.742016%,65.742016% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",268.794718GB/s,268.794718GB/s,268.794718GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",342.843894GB/s,342.843894GB/s,342.843894GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",2319.407649GB/s,2319.407649GB/s,2319.407649GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",272.175805GB/s,272.175805GB/s,272.175805GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",976.492315GB/s,976.492315GB/s,976.492315GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",268.217433GB/s,268.217433GB/s,268.217433GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",976.770329GB/s,976.770329GB/s,976.770329GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",61.030273KB/s,61.030273KB/s,61.029297KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",99619840,99619840,99619840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",52817920,52817920,52817920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",52848316,52848316,52848316 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.015230%,0.015230%,0.015230% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.174824%,0.174824%,0.174824% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",3.778352%,3.778352%,3.778352% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",93.512633%,93.512633%,93.512633% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.005718%,0.005718%,0.005718% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.013172%,0.013172%,0.013172% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000993%,0.000993%,0.000993% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",525598720,525598720,525598720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1069547520,1069547520,1069547520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",84213760,84213760,84213760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",52848316,52848316,52848316 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",33433600,33433600,33433600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",33433600,33433600,33433600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",23381404,23381404,23381404 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.333187%,1.333187%,1.333187% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.165892%,1.165892%,1.165892% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.201769,0.201769,0.201769 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.213597,0.213597,0.213597 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.339913%,5.339913%,5.339913% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",95.001176%,95.001176%,95.001176% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.988101,0.988101,0.988101 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.946601,0.946601,0.946601 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000295,0.000295,0.000295 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",5866,5866,5866 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",1282,1282,1282 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",900,900,900 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.998474%,99.998474%,99.998474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",11.642478MB/s,11.642478MB/s,11.642477MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",8.173347MB/s,8.173347MB/s,8.173346MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5951.831025GB/s,5951.831025GB/s,5951.831025GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",46.497260MB/s,46.497260MB/s,46.497259MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",53.272057MB/s,53.272057MB/s,53.272056MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.791016KB/s,148.791016KB/s,148.790039KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.497070KB/s,46.497070KB/s,46.496094KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",122240000,122240000,122240000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",87255200,87255200,87255200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.017386%,0.017386%,0.017386% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.381159%,1.381159%,1.381159% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",54.759373%,54.759373%,54.759373% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",42.527595%,42.527595%,42.527595% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003032%,0.003032%,0.003032% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.028598%,0.028598%,0.028598% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000958%,0.000958%,0.000958% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",87255200,87255200,87255200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000065%,0.000065%,0.000065% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.281833%,1.281833%,1.281833% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.259717,0.259717,0.259717 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.259618,0.259618,0.259618 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",6.490461%,6.490461%,6.490461% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.691871%,99.691871%,99.691871% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.989647,0.989647,0.989647 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.085944,1.085944,1.085944 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",14594.000000,14594.000000,14594.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.123355%,99.123355%,99.123355% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001187,0.001187,0.001187 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",83845038,83845038,83845038 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772203,167772203,167772203 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",81610090,81610090,81610090 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96765592,96765592,96765592 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",532.532348GB/s,532.532348GB/s,532.532348GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",532.532348GB/s,532.532348GB/s,532.532348GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",7.576203%,7.576203%,7.576203% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.229442%,51.229442%,51.229442% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",42.695878%,42.695878%,42.695878% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",259.041863GB/s,259.041863GB/s,259.041863GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",307.147550GB/s,307.147550GB/s,307.147550GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",665.730441GB/s,665.730441GB/s,665.730441GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",267.270904GB/s,267.270904GB/s,267.270904GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",532.532348GB/s,532.532348GB/s,532.532348GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",266.135901GB/s,266.135901GB/s,266.135901GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",532.532484GB/s,532.532484GB/s,532.532484GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",16.641602KB/s,16.641602KB/s,16.640625KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",104862720,104862720,104862720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",57077760,57077760,57077760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",57145484,57145484,57145484 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (7)","High (7)","High (7)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006340%,0.006340%,0.006340% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.096295%,0.096295%,0.096295% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",7.408122%,7.408122%,7.408122% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",90.441316%,90.441316%,90.441316% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.016622%,0.016622%,0.016622% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004458%,0.004458%,0.004458% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000671%,0.000671%,0.000671% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",619970560,619970560,619970560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1090519040,1090519040,1090519040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",105185280,105185280,105185280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",57145484,57145484,57145484 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",34088960,34088960,34088960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",34088960,34088960,34088960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",84202616,84202616,84202616 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.624918%,1.624918%,1.624918% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.401259%,0.401259%,0.401259% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.097810,0.097810,0.097810 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.081673,0.081673,0.081673 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.041817%,2.041817%,2.041817% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",74.320444%,74.320444%,74.320444% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.740218,0.740218,0.740218 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.272140,0.272140,0.272140 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",32191.000000,32191.000000,32191.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.819740%,98.819740%,98.819740% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000090,0.000090,0.000090 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",82022,82022,82022 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",82306,82306,82306 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",20818,20818,20818 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.987793%,99.987793%,99.987793% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",374.497550MB/s,374.497550MB/s,374.497549MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",94.723228MB/s,94.723228MB/s,94.723227MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",2982.020805GB/s,2982.020805GB/s,2982.020805GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",372.741225MB/s,372.741225MB/s,372.741224MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",373.205332MB/s,373.205332MB/s,373.205331MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",74.547852KB/s,74.547852KB/s,74.546875KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.295898KB/s,23.295898KB/s,23.294922KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",129807360,129807360,129807360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",129819028,129819028,129819028 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.011012%,0.011012%,0.011012% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.984372%,0.984372%,0.984372% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.996119%,98.996119%,98.996119% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.003993%,0.003993%,0.003993% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001564%,0.001564%,0.001564% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002325%,0.002325%,0.002325% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000201%,0.000201%,0.000201% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",2769059840,2769059840,2769059840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",129819028,129819028,129819028 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",81920,81920,81920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000382%,0.000382%,0.000382% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.243222,0.243222,0.243222 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.193316,0.193316,0.193316 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",4.832897%,4.832897%,4.832897% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",99.890757%,99.890757%,99.890757% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999168,0.999168,0.999168 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.193556,0.193556,0.193556 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31295.000000,31295.000000,31295.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.785948%,98.785948%,98.785948% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000099,0.000099,0.000099 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",280337530,280337530,280337530 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",276014882,276014882,276014882 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",95983,95983,95983 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",16.446892%,16.446892%,16.446892% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.775337%,50.775337%,50.775337% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",785.720405GB/s,785.720405GB/s,785.720405GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",279.788467MB/s,279.788467MB/s,279.788466MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1910.419199GB/s,1910.419199GB/s,1910.419199GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",798.082953GB/s,798.082953GB/s,798.082953GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",798.025513GB/s,798.025513GB/s,798.025513GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",107.457031KB/s,107.457031KB/s,107.456055KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",14.923828KB/s,14.923828KB/s,14.922852KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",125219840,125219840,125219840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",125232228,125232228,125232228 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007236%,0.007236%,0.007236% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.633927%,0.633927%,0.633927% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.260101%,99.260101%,99.260101% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.094833%,0.094833%,0.094833% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.001009%,0.001009%,0.001009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001481%,0.001481%,0.001481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000010%,0.000010%,0.000010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2622259200,2622259200,2622259200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",125232228,125232228,125232228 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",280357708,280357708,280357708 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000022%,0.000022%,0.000022% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001381%,0.001381%,0.001381% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.124952,0.124952,0.124952 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.122634,0.122634,0.122634 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",3.065857%,3.065857%,3.065857% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",97.451329%,97.451329%,97.451329% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999324,0.999324,0.999324 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.123516,0.123516,0.123516 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",9282.000000,9282.000000,9282.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.621660%,98.621660%,98.621660% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000486,0.000486,0.000486 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",343310,343310,343310 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544379,335544379,335544379 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",251117,251117,251117 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",16814918,16814918,16814918 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",960.161553GB/s,960.161553GB/s,960.161553GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",960.161553GB/s,960.161553GB/s,960.161553GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",960.161553GB/s,960.161553GB/s,960.161553GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",8.483544%,8.483544%,8.483544% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",32.698365%,32.698365%,32.698365% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",97.140205%,97.140205%,97.140205% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",735.818141MB/s,735.818141MB/s,735.818140MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",48.115962GB/s,48.115962GB/s,48.115962GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",90.073749GB/s,90.073749GB/s,90.073749GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",945.088704MB/s,945.088704MB/s,945.088703MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",960.161553GB/s,960.161553GB/s,960.161553GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",0.982383GB/s,0.982383GB/s,0.982383GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",960.161722GB/s,960.161722GB/s,960.161722GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",15.001953KB/s,15.001953KB/s,15.000977KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",15733760,15733760,15733760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",29880320,29880320,29880320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",29894854,29894854,29894854 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005375%,0.005375%,0.005375% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.050619%,0.050619%,0.050619% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",1.477608%,1.477608%,1.477608% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.745444%,0.745444%,0.745444% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.002303%,0.002303%,0.002303% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002339%,0.002339%,0.002339% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000472%,0.000472%,0.000472% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",211025920,211025920,211025920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",734003200,734003200,734003200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",29894854,29894854,29894854 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",22947840,22947840,22947840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",22947840,22947840,22947840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",322536,322536,322536 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.094174%,97.094174%,97.094174% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.621667%,0.621667%,0.621667% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.062552,0.062552,0.062552 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.041116,0.041116,0.041116 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.027893%,1.027893%,1.027893% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",71.246363%,71.246363%,71.246363% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.702391,0.702391,0.702391 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.317616,0.317616,0.317616 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",14274.000000,14274.000000,14274.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.103703%,99.103703%,99.103703% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001091,0.001091,0.001091 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",203864006,203864006,203864006 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544370,335544370,335544370 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",202840938,202840938,202840938 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",211287277,211287277,211287277 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",529.295612GB/s,529.295612GB/s,529.295612GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",529.295612GB/s,529.295612GB/s,529.295612GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",529.295612GB/s,529.295612GB/s,529.295612GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.978400%,0.978400%,0.978400% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",1.178385%,1.178385%,1.178385% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",37.468716%,37.468716%,37.468716% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",319.966133GB/s,319.966133GB/s,319.966133GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",333.289589GB/s,333.289589GB/s,333.289589GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",330.842063GB/s,330.842063GB/s,330.842063GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",322.394485GB/s,322.394485GB/s,322.394485GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",529.295612GB/s,529.295612GB/s,529.295612GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",321.579945GB/s,321.579945GB/s,321.579945GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",529.295691GB/s,529.295691GB/s,529.295691GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",8.269531KB/s,8.269531KB/s,8.268555KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",104862720,104862720,104862720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",55439360,55439360,55439360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",55499836,55499836,55499836 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003721%,0.003721%,0.003721% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.048829%,0.048829%,0.048829% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",15.564744%,15.564744%,15.564744% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",81.216030%,81.216030%,81.216030% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.004372%,0.004372%,0.004372% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002532%,0.002532%,0.002532% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001253%,0.001253%,0.001253% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",567541760,567541760,567541760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1090519040,1090519040,1090519040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",105185280,105185280,105185280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",55499836,55499836,55499836 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",34088960,34088960,34088960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",34088960,34088960,34088960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",204380380,204380380,204380380 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",2.989050%,2.989050%,2.989050% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.169469%,0.169469%,0.169469% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.047697,0.047697,0.047697 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.037293,0.037293,0.037293 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.932325%,0.932325%,0.932325% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",78.832960%,78.832960%,78.832960% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.739765,0.739765,0.739765 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.117376,0.117376,0.117376 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",27967.000000,27967.000000,27967.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.641479%,98.641479%,98.641479% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000271,0.000271,0.000271 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",82714,82714,82714 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",82426,82426,82426 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",26382,26382,26382 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.975586%,99.975586%,99.975586% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",747.691671MB/s,747.691671MB/s,747.691670MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",239.312859MB/s,239.312859MB/s,239.312858MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5944.995121GB/s,5944.995121GB/s,5944.995121GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",743.101712MB/s,743.101712MB/s,743.101711MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",750.304138MB/s,750.304138MB/s,750.304137MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.620117KB/s,148.620117KB/s,148.619141KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.443359KB/s,46.443359KB/s,46.442383KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",108180480,108180480,108180480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",108209662,108209662,108209662 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023502%,0.023502%,0.023502% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.689164%,1.689164%,1.689164% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",56.364997%,56.364997%,56.364997% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",40.662468%,40.662468%,40.662468% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003112%,0.003112%,0.003112% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.023760%,0.023760%,0.023760% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001048%,0.001048%,0.001048% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2076999680,2076999680,2076999680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",108209662,108209662,108209662 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",81920,81920,81920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.231883%,1.231883%,1.231883% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.406418,0.406418,0.406418 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.321789,0.321789,0.321789 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",8.044731%,8.044731%,8.044731% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.716959%,99.716959%,99.716959% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.982767,0.982767,0.982767 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.107887,1.107887,1.107887 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39615.000000,39615.000000,39615.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.040925%,99.040925%,99.040925% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000111,0.000111,0.000111 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",670739146,670739146,670739146 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",59,59,59 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",666111310,666111310,666111310 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",109184,109184,109184 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.052007%,0.052007%,0.052007% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.507211%,0.507211%,0.507211% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",807.730517GB/s,807.730517GB/s,807.730517GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",135.574697MB/s,135.574697MB/s,135.574696MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",813.790891GB/s,813.790891GB/s,813.790891GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",813.342838GB/s,813.342838GB/s,813.342838GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",813.342259GB/s,813.342259GB/s,813.342259GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",75.018555KB/s,75.018555KB/s,75.017578KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.357422KB/s,6.357422KB/s,6.356445KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",202828800,202828800,202828800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",167836916,167836916,167836916 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003273%,0.003273%,0.003273% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.358494%,0.358494%,0.358494% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.634662%,99.634662%,99.634662% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001121%,0.001121%,0.001121% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000447%,0.000447%,0.000447% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001522%,0.001522%,0.001522% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000124%,0.000124%,0.000124% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3985408000,3985408000,3985408000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",167836916,167836916,167836916 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",670739624,670739624,670739624 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000348%,0.000348%,0.000348% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.071848,0.071848,0.071848 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.071810,0.071810,0.071810 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.795258%,1.795258%,1.795258% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",95.238623%,95.238623%,95.238623% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999308,0.999308,0.999308 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072033,0.072033,0.072033 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",9538.000000,9538.000000,9538.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.658655%,98.658655%,98.658655% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000565,0.000565,0.000565 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",163942,163942,163942 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772204,167772204,167772204 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",163854,163854,163854 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",8864716,8864716,8864716 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1246.160269GB/s,1246.160269GB/s,1246.160269GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1246.160269GB/s,1246.160269GB/s,1246.160269GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",11.024306%,11.024306%,11.024306% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",96.752656%,96.752656%,96.752656% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",1.217057GB/s,1.217057GB/s,1.217057GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",65.844398GB/s,65.844398GB/s,65.844398GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",311.692186GB/s,311.692186GB/s,311.692186GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1.216953GB/s,1.216953GB/s,1.216953GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1246.160269GB/s,1246.160269GB/s,1246.160269GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",1.217711GB/s,1.217711GB/s,1.217711GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1246.160595GB/s,1246.160595GB/s,1246.160595GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",38.942383KB/s,38.942383KB/s,38.941406KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",20976640,20976640,20976640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",31191040,31191040,31191040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",31208677,31208677,31208677 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007447%,0.007447%,0.007447% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.059393%,0.059393%,0.059393% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.972091%,0.972091%,0.972091% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",42.303560%,42.303560%,42.303560% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.004982%,0.004982%,0.004982% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.007948%,0.007948%,0.007948% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000323%,0.000323%,0.000323% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",211025920,211025920,211025920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",754974720,754974720,754974720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",21299200,21299200,21299200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",31208677,31208677,31208677 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",23603200,23603200,23603200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",23603200,23603200,23603200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",163840,163840,163840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",55.879926%,55.879926%,55.879926% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.764331%,0.764331%,0.764331% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.118701,0.118701,0.118701 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.079885,0.079885,0.079885 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.997122%,1.997122%,1.997122% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",97.288826%,97.288826%,97.288826% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.975587,0.975587,0.975587 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.553352,0.553352,0.553352 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",38719.000000,38719.000000,38719.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.018731%,99.018731%,99.018731% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000097,0.000097,0.000097 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",529706598,529706598,529706598 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",50,50,50 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",502924548,502924548,502924548 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",109195,109195,109195 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",21.098757%,21.098757%,21.098757% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",4.177933%,4.177933%,4.177933% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",773.460504GB/s,773.460504GB/s,773.460504GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",171.964189MB/s,171.964189MB/s,171.964188MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",1032.115851GB/s,1032.115851GB/s,1032.115851GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",814.327387GB/s,814.327387GB/s,814.327387GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",814.649302GB/s,814.649302GB/s,814.649302GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",80.630859KB/s,80.630859KB/s,80.629883KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",8.062500KB/s,8.062500KB/s,8.061523KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",198241280,198241280,198241280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",163246630,163246630,163246630 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.004232%,0.004232%,0.004232% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.446473%,0.446473%,0.446473% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.544921%,99.544921%,99.544921% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001474%,0.001474%,0.001474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000563%,0.000563%,0.000563% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001792%,0.001792%,0.001792% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000146%,0.000146%,0.000146% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3838607360,3838607360,3838607360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",163246630,163246630,163246630 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",529497280,529497280,529497280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000012%,0.000012%,0.000012% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000386%,0.000386%,0.000386% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.089171,0.089171,0.089171 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.089845,0.089845,0.089845 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",2.246131%,2.246131%,2.246131% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",94.928934%,94.928934%,94.928934% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999565,0.999565,0.999565 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.090094,0.090094,0.090094 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",19393.000000,19393.000000,19393.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.700884%,98.700884%,98.700884% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000600,0.000600,0.000600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",96187666,96187666,96187666 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167796348,167796348,167796348 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",94173430,94173430,94173430 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",112486659,112486659,112486659 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",483.970884GB/s,483.970884GB/s,483.970884GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",483.970884GB/s,483.970884GB/s,483.970884GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",10.198208%,10.198208%,10.198208% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.959692%,50.959692%,50.959692% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",34.042242%,34.042242%,34.042242% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",271.661271GB/s,271.661271GB/s,271.661271GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",324.489283GB/s,324.489283GB/s,324.489283GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",726.015405GB/s,726.015405GB/s,726.015405GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",276.604541GB/s,276.604541GB/s,276.604541GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",483.970884GB/s,483.970884GB/s,483.970884GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",277.471720GB/s,277.471720GB/s,277.471720GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",484.040659GB/s,484.040659GB/s,484.040659GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",15.124023KB/s,15.124023KB/s,15.123047KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",125834240,125834240,125834240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",64281600,64281600,64281600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",64320197,64320197,64320197 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.010533%,0.010533%,0.010533% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.095303%,0.095303%,0.095303% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",10.577286%,10.577286%,10.577286% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",88.450287%,88.450287%,88.450287% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.004057%,0.004057%,0.004057% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006888%,0.006888%,0.006888% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000151%,0.000151%,0.000151% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",714178560,714178560,714178560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1174405120,1174405120,1174405120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",147128320,147128320,147128320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",64320197,64320197,64320197 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",36710400,36710400,36710400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",36710400,36710400,36710400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",95887052,95887052,95887052 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.459699%,0.459699%,0.459699% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.395797%,0.395797%,0.395797% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.103665,0.103665,0.103665 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.078143,0.078143,0.078143 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.953566%,1.953566%,1.953566% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",78.998724%,78.998724%,78.998724% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.740981,0.740981,0.740981 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.265231,0.265231,0.265231 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000298,0.000298,0.000298 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",5222,5222,5222 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",390,390,390 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",296,296,296 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.998474%,99.998474%,99.998474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",97.500000%,97.500000%,97.500000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",3.542150MB/s,3.542150MB/s,3.542150MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",2.688401MB/s,2.688401MB/s,2.688400MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5952.449225GB/s,5952.449225GB/s,5952.449225GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",46.502090MB/s,46.502090MB/s,46.502089MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",47.428499MB/s,47.428499MB/s,47.428498MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.806641KB/s,148.806641KB/s,148.805664KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.501953KB/s,46.501953KB/s,46.500977KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",87229440,87229440,87229440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",87255550,87255550,87255550 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018775%,0.018775%,0.018775% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.385932%,1.385932%,1.385932% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",57.226684%,57.226684%,57.226684% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",40.088364%,40.088364%,40.088364% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003062%,0.003062%,0.003062% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.028279%,0.028279%,0.028279% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000886%,0.000886%,0.000886% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",87255550,87255550,87255550 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000065%,0.000065%,0.000065% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.247953%,1.247953%,1.247953% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.347410,0.347410,0.347410 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.259718,0.259718,0.259718 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",6.492940%,6.492940%,6.492940% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.863059%,99.863059%,99.863059% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.990641,0.990641,0.990641 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.061979,1.061979,1.061979 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31423.000000,31423.000000,31423.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.790894%,98.790894%,98.790894% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000098,0.000098,0.000098 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",311523214,311523214,311523214 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",35,35,35 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",309246062,309246062,309246062 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96519,96519,96519 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",7.135975%,7.135975%,7.135975% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.364596%,50.364596%,50.364596% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",823.658926GB/s,823.658926GB/s,823.658926GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",263.242496MB/s,263.242496MB/s,263.242496MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1787.460166GB/s,1787.460166GB/s,1787.460166GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",829.928397GB/s,829.928397GB/s,829.928397GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",829.723988GB/s,829.723988GB/s,829.723988GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",97.748047KB/s,97.748047KB/s,97.747070KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.963867KB/s,13.963867KB/s,13.962891KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",125875200,125875200,125875200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",125887565,125887565,125887565 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006691%,0.006691%,0.006691% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.587561%,0.587561%,0.587561% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.294302%,99.294302%,99.294302% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.106377%,0.106377%,0.106377% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000931%,0.000931%,0.000931% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002918%,0.002918%,0.002918% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000017%,0.000017%,0.000017% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2643230720,2643230720,2643230720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",125887565,125887565,125887565 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",311599960,311599960,311599960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000020%,0.000020%,0.000020% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001184%,0.001184%,0.001184% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.119566,0.119566,0.119566 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.113844,0.113844,0.113844 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.846109%,2.846109%,2.846109% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",98.651949%,98.651949%,98.651949% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999263,0.999263,0.999263 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.114601,0.114601,0.114601 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",10434.000000,10434.000000,10434.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.773840%,98.773840%,98.773840% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000854,0.000854,0.000854 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",24021542,24021542,24021542 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335568513,335568513,335568513 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",18364878,18364878,18364878 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",68511161,68511161,68511161 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",1069.767452GB/s,1069.767452GB/s,1069.767452GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",1069.767452GB/s,1069.767452GB/s,1069.767452GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",1069.767452GB/s,1069.767452GB/s,1069.767452GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",12.033205%,12.033205%,12.033205% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",22.859645%,22.859645%,22.859645% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",81.385503%,81.385503%,81.385503% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",58.550086GB/s,58.550086GB/s,58.550086GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",218.424231GB/s,218.424231GB/s,218.424231GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",234.076924GB/s,234.076924GB/s,234.076924GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",77.125217GB/s,77.125217GB/s,77.125217GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1069.767452GB/s,1069.767452GB/s,1069.767452GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",76.584410GB/s,76.584410GB/s,76.584410GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",1069.844583GB/s,1069.844583GB/s,1069.844583GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",16.714844KB/s,16.714844KB/s,16.713867KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",36705280,36705280,36705280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",53422080,53422080,53422080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",35809113,35809113,35809113 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008171%,0.008171%,0.008171% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.061428%,0.061428%,0.061428% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",4.143792%,4.143792%,4.143792% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",79.753785%,79.753785%,79.753785% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.004564%,0.004564%,0.004564% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004782%,0.004782%,0.004782% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000662%,0.000662%,0.000662% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",294912000,294912000,294912000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",817889280,817889280,817889280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",21299200,21299200,21299200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",35809113,35809113,35809113 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",25569280,25569280,25569280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",25569280,25569280,25569280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",24191172,24191172,24191172 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",15.840773%,15.840773%,15.840773% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.182042%,0.182042%,0.182042% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.049205,0.049205,0.049205 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.049394,0.049394,0.049394 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.234855%,1.234855%,1.234855% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",77.634535%,77.634535%,77.634535% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.586835,0.586835,0.586835 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.118174,0.118174,0.118174 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",15426.000000,15426.000000,15426.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.170637%,99.170637%,99.170637% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001040,0.001040,0.001040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",247585626,247585626,247585626 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544370,335544370,335544370 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",246454706,246454706,246454706 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",250588893,250588893,250588893 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",501.177843GB/s,501.177843GB/s,501.177843GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",501.177843GB/s,501.177843GB/s,501.177843GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",501.177843GB/s,501.177843GB/s,501.177843GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.689273%,0.689273%,0.689273% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.182611%,0.182611%,0.182611% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",25.610806%,25.610806%,25.610806% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",368.111247GB/s,368.111247GB/s,368.111247GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",374.286177GB/s,374.286177GB/s,374.286177GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",375.913972GB/s,375.913972GB/s,375.913972GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",369.838037GB/s,369.838037GB/s,369.838037GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",501.177843GB/s,501.177843GB/s,501.177843GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",369.800419GB/s,369.800419GB/s,369.800419GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",501.177918GB/s,501.177918GB/s,501.177918GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",7.830078KB/s,7.830078KB/s,7.829102KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",125834240,125834240,125834240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",61337600,61337600,61337600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",61401362,61401362,61401362 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003565%,0.003565%,0.003565% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.062567%,0.062567%,0.062567% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",14.968982%,14.968982%,14.968982% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",83.583478%,83.583478%,83.583478% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.005041%,0.005041%,0.005041% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002826%,0.002826%,0.002826% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000933%,0.000933%,0.000933% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",651427840,651427840,651427840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1174405120,1174405120,1174405120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",126156800,126156800,126156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",61401362,61401362,61401362 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",36710400,36710400,36710400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",36710400,36710400,36710400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",247610812,247610812,247610812 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.203007%,1.203007%,1.203007% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.169601%,0.169601%,0.169601% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.044624,0.044624,0.044624 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.035900,0.035900,0.035900 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.897506%,0.897506%,0.897506% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",85.087552%,85.087552%,85.087552% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.599468,0.599468,0.599468 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.101100,0.101100,0.101100 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39743.000000,39743.000000,39743.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.044014%,99.044014%,99.044014% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000126,0.000126,0.000126 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",652897070,652897070,652897070 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",46,46,46 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",649674650,649674650,649674650 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",97002,97002,97002 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",2.711407%,2.711407%,2.711407% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.494781%,0.494781%,0.494781% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",816.741970GB/s,816.741970GB/s,816.741970GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",124.873302MB/s,124.873302MB/s,124.873302MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",843.688429GB/s,843.688429GB/s,843.688429GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",820.787554GB/s,820.787554GB/s,820.787554GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",820.793052GB/s,820.793052GB/s,820.793052GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",60.637695KB/s,60.637695KB/s,60.636719KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.590820KB/s,6.590820KB/s,6.589844KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",203484160,203484160,203484160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",168495885,168495885,168495885 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003400%,0.003400%,0.003400% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.368597%,0.368597%,0.368597% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.622597%,99.622597%,99.622597% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.003567%,0.003567%,0.003567% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000451%,0.000451%,0.000451% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000941%,0.000941%,0.000941% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000115%,0.000115%,0.000115% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4006379520,4006379520,4006379520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",168495885,168495885,168495885 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",652892696,652892696,652892696 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000010%,0.000010%,0.000010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000321%,0.000321%,0.000321% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.073824,0.073824,0.073824 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.073833,0.073833,0.073833 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.845836%,1.845836%,1.845836% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",96.177932%,96.177932%,96.177932% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999170,0.999170,0.999170 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.074008,0.074008,0.074008 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",30655.000000,30655.000000,30655.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.760602%,98.760602%,98.760602% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000234,0.000234,0.000234 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",27149106,27149106,27149106 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",34,34,34 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",263394,263394,263394 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",87805,87805,87805 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",91.356957%,91.356957%,91.356957% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",95.499082%,95.499082%,95.499082% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",2.323936GB/s,2.323936GB/s,2.323936GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",793.299970MB/s,793.299970MB/s,793.299969MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5921.222420GB/s,5921.222420GB/s,5921.222420GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",255.879092GB/s,255.879092GB/s,255.879092GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",239.537641GB/s,239.537641GB/s,239.537641GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",314.554688KB/s,314.554688KB/s,314.553711KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.257812KB/s,46.257812KB/s,46.256836KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",156953600,156953600,156953600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",121968512,121968512,121968512 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023824%,0.023824%,0.023824% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.910574%,1.910574%,1.910574% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",71.405076%,71.405076%,71.405076% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",25.792109%,25.792109%,25.792109% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003136%,0.003136%,0.003136% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.026596%,0.026596%,0.026596% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000900%,0.000900%,0.000900% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2517401600,2517401600,2517401600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",121968512,121968512,121968512 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",29001240,29001240,29001240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000067%,0.000067%,0.000067% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.837719%,0.837719%,0.837719% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.361748,0.361748,0.361748 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.361832,0.361832,0.361832 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",9.045809%,9.045809%,9.045809% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.555526%,99.555526%,99.555526% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.997284,0.997284,0.997284 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.887102,0.887102,0.887102 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",8838.000000,8838.000000,8838.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.552416%,98.552416%,98.552416% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000168,0.000168,0.000168 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",5446,5446,5446 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772176,167772176,167772176 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",770,770,770 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",3884,3884,3884 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",923.131061GB/s,923.131061GB/s,923.131061GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",923.131061GB/s,923.131061GB/s,923.131061GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.027344%,3.027344%,3.027344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",95.000000%,95.000000%,95.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999695%,99.999695%,99.999695% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",4.338445MB/s,4.338445MB/s,4.338444MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",21.883795MB/s,21.883795MB/s,21.883794MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",57.808378GB/s,57.808378GB/s,57.808378GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",28.847845MB/s,28.847845MB/s,28.847844MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",923.131061GB/s,923.131061GB/s,923.131061GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",30.684642MB/s,30.684642MB/s,30.684641MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",923.131149GB/s,923.131149GB/s,923.131149GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",28.847656KB/s,28.847656KB/s,28.846680KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",45250560,45250560,45250560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",27611634,27611634,27611634 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005861%,0.005861%,0.005861% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.038084%,0.038084%,0.038084% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.580992%,0.580992%,0.580992% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.047274%,0.047274%,0.047274% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.002531%,0.002531%,0.002531% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002289%,0.002289%,0.002289% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000072%,0.000072%,0.000072% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",169738240,169738240,169738240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",27611634,27611634,27611634 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.265450%,98.265450%,98.265450% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.057447%,1.057447%,1.057447% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.058402,0.058402,0.058402 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.058412,0.058412,0.058412 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.460307%,1.460307%,1.460307% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",87.036954%,87.036954%,87.036954% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.980101,0.980101,0.980101 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.715936,0.715936,0.715936 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",20417.000000,20417.000000,20417.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.766041%,98.766041%,98.766041% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000892,0.000892,0.000892 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",128557986,128557986,128557986 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772205,167772205,167772205 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",124805342,124805342,124805342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",133665488,133665488,133665488 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",403.572293GB/s,403.572293GB/s,403.572293GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",403.572293GB/s,403.572293GB/s,403.572293GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",6.145934%,6.145934%,6.145934% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.266564%,51.266564%,51.266564% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",21.217368%,21.217368%,21.217368% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",300.216544GB/s,300.216544GB/s,300.216544GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",321.529433GB/s,321.529433GB/s,321.529433GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",706.300777GB/s,706.300777GB/s,706.300777GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",306.619595GB/s,306.619595GB/s,306.619595GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",403.572293GB/s,403.572293GB/s,403.572293GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",309.243448GB/s,309.243448GB/s,309.243448GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",403.572401GB/s,403.572401GB/s,403.572401GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",12.611328KB/s,12.611328KB/s,12.610352KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",146805760,146805760,146805760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",104535040,104535040,104535040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",69586601,69586601,69586601 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008167%,0.008167%,0.008167% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.075287%,0.075287%,0.075287% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",10.470977%,10.470977%,10.470977% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",88.666320%,88.666320%,88.666320% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.006697%,0.006697%,0.006697% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006440%,0.006440%,0.006440% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000116%,0.000116%,0.000116% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",798064640,798064640,798064640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1258291200,1258291200,1258291200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",147128320,147128320,147128320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",69586601,69586601,69586601 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",39331840,39331840,39331840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",39331840,39331840,39331840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",127467204,127467204,127467204 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.384760%,0.384760%,0.384760% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.381238%,0.381238%,0.381238% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.069300,0.069300,0.069300 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.069362,0.069362,0.069362 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.734049%,1.734049%,1.734049% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",80.909893%,80.909893%,80.909893% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.741876,0.741876,0.741876 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.250093,0.250093,0.250093 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39359.000000,39359.000000,39359.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.034687%,99.034687%,99.034687% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000110,0.000110,0.000110 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",660513658,660513658,660513658 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",43,43,43 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",655674514,655674514,655674514 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",96511,96511,96511 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",1.576051%,1.576051%,1.576051% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.784496%,0.784496%,0.784496% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",817.597486GB/s,817.597486GB/s,817.597486GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",123.233282MB/s,123.233282MB/s,123.233281MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",836.843766GB/s,836.843766GB/s,836.843766GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",823.629545GB/s,823.629545GB/s,823.629545GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",823.631687GB/s,823.631687GB/s,823.631687GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",56.223633KB/s,56.223633KB/s,56.222656KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.537109KB/s,6.537109KB/s,6.536133KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",166507520,166507520,166507520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",166525876,166525876,166525876 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003324%,0.003324%,0.003324% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.361849%,0.361849%,0.361849% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.632021%,99.632021%,99.632021% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001161%,0.001161%,0.001161% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000448%,0.000448%,0.000448% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000785%,0.000785%,0.000785% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000104%,0.000104%,0.000104% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3943464960,3943464960,3943464960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",166525876,166525876,166525876 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",660511940,660511940,660511940 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000010%,0.000010%,0.000010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000299%,0.000299%,0.000299% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.080511,0.080511,0.080511 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.072520,0.072520,0.072520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.812992%,1.812992%,1.812992% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",93.916587%,93.916587%,93.916587% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999361,0.999361,0.999361 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072624,0.072624,0.072624 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",10626.000000,10626.000000,10626.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.795996%,98.795996%,98.795996% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000681,0.000681,0.000681 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",13524334,13524334,13524334 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772197,167772197,167772197 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",11631246,11631246,11631246 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",32112100,32112100,32112100 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1141.311481GB/s,1141.311481GB/s,1141.311481GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1141.311481GB/s,1141.311481GB/s,1141.311481GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",13.549091%,13.549091%,13.549091% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",57.043526%,57.043526%,57.043526% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",81.425591%,81.425591%,81.425591% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",79.124418GB/s,79.124418GB/s,79.124418GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",218.450477GB/s,218.450477GB/s,218.450477GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",570.795061GB/s,570.795061GB/s,570.795061GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",92.031201GB/s,92.031201GB/s,92.031201GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1141.311481GB/s,1141.311481GB/s,1141.311481GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",92.002616GB/s,92.002616GB/s,92.002616GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1141.311733GB/s,1141.311733GB/s,1141.311733GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",35.665039KB/s,35.665039KB/s,35.664062KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",41948160,41948160,41948160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",36761600,36761600,36761600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",36786619,36786619,36786619 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007230%,0.007230%,0.007230% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.068631%,0.068631%,0.068631% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",3.842393%,3.842393%,3.842393% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",91.532629%,91.532629%,91.532629% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.006644%,0.006644%,0.006644% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003462%,0.003462%,0.003462% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000388%,0.000388%,0.000388% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",294912000,294912000,294912000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",838860800,838860800,838860800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",31784960,31784960,31784960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",36786619,36786619,36786619 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",26224640,26224640,26224640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",26224640,26224640,26224640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",13528536,13528536,13528536 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",4.234936%,4.234936%,4.234936% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.303686%,0.303686%,0.303686% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.118439,0.118439,0.118439 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.088295,0.088295,0.088295 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.207369%,2.207369%,2.207369% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",94.882226%,94.882226%,94.882226% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.983714,0.983714,0.983714 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.279310,0.279310,0.279310 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",16386.000000,16386.000000,16386.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.219227%,99.219227%,99.219227% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001716,0.001716,0.001716 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",291818606,291818606,291818606 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544363,335544363,335544363 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",289553726,289553726,289553726 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",292125082,292125082,292125082 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",428.831031GB/s,428.831031GB/s,428.831031GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",428.831031GB/s,428.831031GB/s,428.831031GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",428.831031GB/s,428.831031GB/s,428.831031GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.357278%,0.357278%,0.357278% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.150175%,0.150175%,0.150175% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",12.939069%,12.939069%,12.939069% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",370.054313GB/s,370.054313GB/s,370.054313GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",373.340547GB/s,373.340547GB/s,373.340547GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",375.253326GB/s,375.253326GB/s,375.253326GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",372.354427GB/s,372.354427GB/s,372.354427GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",428.831031GB/s,428.831031GB/s,428.831031GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",372.948866GB/s,372.948866GB/s,372.948866GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",428.831086GB/s,428.831086GB/s,428.831086GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.700195KB/s,6.700195KB/s,6.699219KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",146805760,146805760,146805760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",66252800,66252800,66252800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",66366463,66366463,66366463 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003284%,0.003284%,0.003284% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.058942%,0.058942%,0.058942% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",9.458855%,9.458855%,9.458855% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",90.009956%,90.009956%,90.009956% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.007815%,0.007815%,0.007815% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004152%,0.004152%,0.004152% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000879%,0.000879%,0.000879% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",735313920,735313920,735313920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1258291200,1258291200,1258291200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",115671040,115671040,115671040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",66366463,66366463,66366463 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",39331840,39331840,39331840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",39331840,39331840,39331840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",291353480,291353480,291353480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.275441%,0.275441%,0.275441% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.180677%,0.180677%,0.180677% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.040737,0.040737,0.040737 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.032555,0.032555,0.032555 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.813875%,0.813875%,0.813875% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",87.692098%,87.692098%,87.692098% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.603234,0.603234,0.603234 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.100281,0.100281,0.100281 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000295,0.000295,0.000295 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",2662,2662,2662 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",522,522,522 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",440,440,440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.999237%,99.999237%,99.999237% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",4.741787MB/s,4.741787MB/s,4.741786MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",3.996908MB/s,3.996908MB/s,3.996907MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5953.395371GB/s,5953.395371GB/s,5953.395371GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",23.254741MB/s,23.254741MB/s,23.254740MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",24.181296MB/s,24.181296MB/s,24.181295MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.830078KB/s,148.830078KB/s,148.829102KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.508789KB/s,46.508789KB/s,46.507812KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",87229440,87229440,87229440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",87255385,87255385,87255385 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.017718%,0.017718%,0.017718% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.409226%,1.409226%,1.409226% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",53.676782%,53.676782%,53.676782% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",43.497785%,43.497785%,43.497785% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003128%,0.003128%,0.003128% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.028869%,0.028869%,0.028869% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000785%,0.000785%,0.000785% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",87255385,87255385,87255385 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2560,2560,2560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.365642%,1.365642%,1.365642% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.347188,0.347188,0.347188 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.259753,0.259753,0.259753 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",6.493828%,6.493828%,6.493828% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.869090%,99.869090%,99.869090% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.990793,0.990793,0.990793 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.123170,1.123170,1.123170 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31807.000000,31807.000000,31807.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.805491%,98.805491%,98.805491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000115,0.000115,0.000115 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",167771010,167771010,167771010 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",23975,23975,23975 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",167573358,167573358,167573358 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",97203,97203,97203 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",50.001160%,50.001160%,50.001160% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.060172%,50.060172%,50.060172% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",761.358228GB/s,761.358228GB/s,761.358228GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",452.234508MB/s,452.234508MB/s,452.234507MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",3049.138934GB/s,3049.138934GB/s,3049.138934GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",762.243788GB/s,762.243788GB/s,762.243788GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",762.256246GB/s,762.256246GB/s,762.256246GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",111.543083MB/s,111.543083MB/s,111.543082MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.820312KB/s,23.820312KB/s,23.819336KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",162851840,162851840,162851840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",127855956,127855956,127855956 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.012324%,0.012324%,0.012324% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.024264%,1.024264%,1.024264% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.952975%,98.952975%,98.952975% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.005279%,0.005279%,0.005279% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.001654%,0.001654%,0.001654% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002453%,0.002453%,0.002453% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000398%,0.000398%,0.000398% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2706145280,2706145280,2706145280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",127855956,127855956,127855956 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",167768268,167768268,167768268 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000034%,0.000034%,0.000034% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000618%,0.000618%,0.000618% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.197380,0.197380,0.197380 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.197175,0.197175,0.197175 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.929384%,4.929384%,4.929384% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",98.896608%,98.896608%,98.896608% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999129,0.999129,0.999129 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.197569,0.197569,0.197569 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",14466.000000,14466.000000,14466.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.115599%,99.115599%,99.115599% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000426,0.000426,0.000426 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",26061770,26061770,26061770 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886120,83886120,83886120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",25307418,25307418,25307418 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",43676771,43676771,43676771 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",780.689855GB/s,780.689855GB/s,780.689855GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",780.689855GB/s,780.689855GB/s,780.689855GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",45.094833%,45.094833%,45.094833% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",49.826988%,49.826988%,49.826988% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",50.113463%,50.113463%,50.113463% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",235.524708GB/s,235.524708GB/s,235.524708GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",406.479979GB/s,406.479979GB/s,406.479979GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",2147.087699GB/s,2147.087699GB/s,2147.087699GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",237.327926GB/s,237.327926GB/s,237.327926GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",780.689855GB/s,780.689855GB/s,780.689855GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",242.545121GB/s,242.545121GB/s,242.545121GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",780.690227GB/s,780.690227GB/s,780.690227GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",48.792969KB/s,48.792969KB/s,48.791992KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",115348480,115348480,115348480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",74065920,74065920,74065920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",56446463,56446463,56446463 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.010808%,0.010808%,0.010808% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.140674%,0.140674%,0.140674% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",5.103873%,5.103873%,5.103873% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",92.629125%,92.629125%,92.629125% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.006258%,0.006258%,0.006258% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.012629%,0.012629%,0.012629% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001558%,0.001558%,0.001558% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",598999040,598999040,598999040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1132462080,1132462080,1132462080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",63242240,63242240,63242240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",56446463,56446463,56446463 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",35399680,35399680,35399680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",35399680,35399680,35399680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",25501176,25501176,25501176 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.029974%,1.029974%,1.029974% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.065100%,1.065100%,1.065100% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.183860,0.183860,0.183860 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.179816,0.179816,0.179816 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.495410%,4.495410%,4.495410% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",96.074809%,96.074809%,96.074809% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.990280,0.990280,0.990280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.828364,0.828364,0.828364 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31423.000000,31423.000000,31423.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.790894%,98.790894%,98.790894% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000193,0.000193,0.000193 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",9549350,9549350,9549350 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",278465,278465,278465 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",95913,95913,95913 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",97.152618%,97.152618%,97.152618% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",101.127312%,101.127312%,101.127312% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",2.461852GB/s,2.461852GB/s,2.461852GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",868.297974MB/s,868.297974MB/s,868.297973MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5933.138639GB/s,5933.138639GB/s,5933.138639GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",84.466978GB/s,84.466978GB/s,84.466978GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",84.423853GB/s,84.423853GB/s,84.423853GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",333.728516KB/s,333.728516KB/s,333.727539KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.350586KB/s,46.350586KB/s,46.349609KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",160885760,160885760,160885760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",125899440,125899440,125899440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023429%,0.023429%,0.023429% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.946406%,1.946406%,1.946406% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",71.202817%,71.202817%,71.202817% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",26.066171%,26.066171%,26.066171% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003173%,0.003173%,0.003173% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.021127%,0.021127%,0.021127% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000930%,0.000930%,0.000930% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2643230720,2643230720,2643230720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",125899440,125899440,125899440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",9554228,9554228,9554228 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.735880%,0.735880%,0.735880% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.373623,0.373623,0.373623 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.373698,0.373698,0.373698 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.342459%,9.342459%,9.342459% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.730415%,99.730415%,99.730415% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.998701,0.998701,0.998701 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.839703,0.839703,0.839703 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000294,0.000294,0.000294 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",2662,2662,2662 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",246,246,246 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",192,192,192 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.999237%,99.999237%,99.999237% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",97.500000%,97.500000%,97.500000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",2.234354MB/s,2.234354MB/s,2.234353MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",1.743886MB/s,1.743886MB/s,1.743885MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5952.647642GB/s,5952.647642GB/s,5952.647642GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",23.251820MB/s,23.251820MB/s,23.251819MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",24.178260MB/s,24.178260MB/s,24.178259MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.811523KB/s,148.811523KB/s,148.810547KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.502930KB/s,46.502930KB/s,46.501953KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",87229440,87229440,87229440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",87255361,87255361,87255361 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018613%,0.018613%,0.018613% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.488169%,1.488169%,1.488169% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",55.169047%,55.169047%,55.169047% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",41.874168%,41.874168%,41.874168% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003363%,0.003363%,0.003363% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.025184%,0.025184%,0.025184% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001084%,0.001084%,0.001084% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",87255361,87255361,87255361 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2560,2560,2560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000070%,0.000070%,0.000070% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.420303%,1.420303%,1.420303% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.347370,0.347370,0.347370 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.259611,0.259611,0.259611 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",6.490264%,6.490264%,6.490264% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.857455%,99.857455%,99.857455% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.990440,0.990440,0.990440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.110064,1.110064,1.110064 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",15938.000000,15938.000000,15938.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.197280%,99.197280%,99.197280% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000643,0.000643,0.000643 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",51308870,51308870,51308870 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910307,83910307,83910307 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",51571369,51571369,51571369 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",67017375,67017375,67017375 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",491.800699GB/s,491.800699GB/s,491.800699GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",491.800699GB/s,491.800699GB/s,491.800699GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",38.585047%,38.585047%,38.585047% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.441727%,50.441727%,50.441727% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",20.856891%,20.856891%,20.856891% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",302.348558GB/s,302.348558GB/s,302.348558GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",392.904185GB/s,392.904185GB/s,392.904184GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1598.472339GB/s,1598.472339GB/s,1598.472339GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",301.052117GB/s,301.052117GB/s,301.052117GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",491.800699GB/s,491.800699GB/s,491.800699GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",300.809599GB/s,300.809599GB/s,300.809599GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",491.942735GB/s,491.942735GB/s,491.942735GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",30.737305KB/s,30.737305KB/s,30.736328KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",136320000,136320000,136320000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",63959040,63959040,63959040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",64000166,64000166,64000166 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007707%,0.007707%,0.007707% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.110191%,0.110191%,0.110191% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.959566%,2.959566%,2.959566% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.230122%,95.230122%,95.230122% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003958%,0.003958%,0.003958% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.005850%,0.005850%,0.005850% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000889%,0.000889%,0.000889% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",682885120,682885120,682885120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1216348160,1216348160,1216348160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",136642560,136642560,136642560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",64000166,64000166,64000166 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",38021120,38021120,38021120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",38021120,38021120,38021120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",51350236,51350236,51350236 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.897464%,0.897464%,0.897464% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.784253%,0.784253%,0.784253% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.139911,0.139911,0.139911 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.127761,0.127761,0.127761 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",3.194021%,3.194021%,3.194021% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",96.581575%,96.581575%,96.581575% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.992456,0.992456,0.992456 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.624726,0.624726,0.624726 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",8710.000000,8710.000000,8710.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.531142%,98.531142%,98.531142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000293,0.000293,0.000293 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",2886,2886,2886 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886096,83886096,83886096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",346,346,346 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",1869,1869,1869 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",480.402462GB/s,480.402462GB/s,480.402462GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",480.402462GB/s,480.402462GB/s,480.402462GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.879481%,5.879481%,5.879481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",95.000000%,95.000000%,95.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999695%,99.999695%,99.999695% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",2.029043MB/s,2.029043MB/s,2.029042MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",10.960354MB/s,10.960354MB/s,10.960353MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",60.167594GB/s,60.167594GB/s,60.167594GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",15.012576MB/s,15.012576MB/s,15.012575MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",480.402462GB/s,480.402462GB/s,480.402462GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",16.924335MB/s,16.924335MB/s,16.924334MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",480.402554GB/s,480.402554GB/s,480.402554GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",30.024414KB/s,30.024414KB/s,30.023438KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",26951680,26951680,26951680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",26959568,26959568,26959568 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006395%,0.006395%,0.006395% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.035449%,0.035449%,0.035449% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.548555%,0.548555%,0.548555% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.041299%,0.041299%,0.041299% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003606%,0.003606%,0.003606% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002912%,0.002912%,0.002912% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000135%,0.000135%,0.000135% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",159088640,159088640,159088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",491520,491520,491520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",26959568,26959568,26959568 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2560,2560,2560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.211117%,98.211117%,98.211117% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.150531%,1.150531%,1.150531% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.098737,0.098737,0.098737 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.062428,0.062428,0.062428 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",1.560712%,1.560712%,1.560712% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",83.414885%,83.414885%,83.414885% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.981712,0.981712,0.981712 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.782506,0.782506,0.782506 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",8902.000000,8902.000000,8902.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.562823%,98.562823%,98.562823% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000324,0.000324,0.000324 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",20582,20582,20582 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544352,335544352,335544352 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",4202,4202,4202 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",31770,31770,31770 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",1339.062019GB/s,1339.062019GB/s,1339.062019GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",1339.062019GB/s,1339.062019GB/s,1339.062019GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",1339.062019GB/s,1339.062019GB/s,1339.062019GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.024384%,3.024384%,3.024384% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",80.000000%,80.000000%,80.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.998779%,99.998779%,99.998779% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",17.171443MB/s,17.171443MB/s,17.171442MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",129.827882MB/s,129.827882MB/s,129.827881MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",41.927418GB/s,41.927418GB/s,41.927418GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",83.691376MB/s,83.691376MB/s,83.691375MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1339.062019GB/s,1339.062019GB/s,1339.062019GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",84.108198MB/s,84.108198MB/s,84.108197MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",1339.062147GB/s,1339.062147GB/s,1339.062147GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",20.921875KB/s,20.921875KB/s,20.920898KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",45578240,45578240,45578240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",27940306,27940306,27940306 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.004439%,0.004439%,0.004439% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.025446%,0.025446%,0.025446% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.508372%,0.508372%,0.508372% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.044104%,0.044104%,0.044104% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001045%,0.001045%,0.001045% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002516%,0.002516%,0.002516% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000018%,0.000018%,0.000018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",180224000,180224000,180224000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",27940306,27940306,27940306 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",20480,20480,20480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.742640%,98.742640%,98.742640% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.671421%,0.671421%,0.671421% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.038375,0.038375,0.038375 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.038387,0.038387,0.038387 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.959687%,0.959687%,0.959687% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",97.083241%,97.083241%,97.083241% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.984327,0.984327,0.984327 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.456922,0.456922,0.456922 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",8838.000000,8838.000000,8838.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.552416%,98.552416%,98.552416% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000186,0.000186,0.000186 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",6442,6442,6442 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772176,167772176,167772176 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",1818,1818,1818 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",6822,6822,6822 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",922.939703GB/s,922.939703GB/s,922.939703GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",922.939703GB/s,922.939703GB/s,922.939703GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.027344%,3.027344%,3.027344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999390%,99.999390%,99.999390% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",10.241116MB/s,10.241116MB/s,10.241115MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",38.429532MB/s,38.429532MB/s,38.429531MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",57.796395GB/s,57.796395GB/s,57.796395GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",28.841866MB/s,28.841866MB/s,28.841865MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",922.939703GB/s,922.939703GB/s,922.939703GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",36.288925MB/s,36.288925MB/s,36.288924MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",922.939791GB/s,922.939791GB/s,922.939791GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",28.841797KB/s,28.841797KB/s,28.840820KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",45250560,45250560,45250560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",27615240,27615240,27615240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005864%,0.005864%,0.005864% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.038070%,0.038070%,0.038070% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.580417%,0.580417%,0.580417% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.049232%,0.049232%,0.049232% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.002528%,0.002528%,0.002528% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002307%,0.002307%,0.002307% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000071%,0.000071%,0.000071% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",169738240,169738240,169738240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",27615240,27615240,27615240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.263442%,98.263442%,98.263442% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.058069%,1.058069%,1.058069% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.058369,0.058369,0.058369 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.058380,0.058380,0.058380 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.459492%,1.459492%,1.459492% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",86.984643%,86.984643%,86.984643% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.980017,0.980017,0.980017 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.715990,0.715990,0.715990 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31935.000000,31935.000000,31935.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.810279%,98.810279%,98.810279% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000102,0.000102,0.000102 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",335545246,335545246,335545246 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",37,37,37 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",335518950,335518950,335518950 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",93947,93947,93947 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.004647%,50.004647%,50.004647% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",829.990205GB/s,829.990205GB/s,829.990205GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",237.979082MB/s,237.979082MB/s,237.979081MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1660.156590GB/s,1660.156590GB/s,1660.156590GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",830.052964GB/s,830.052964GB/s,830.052964GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",830.055255GB/s,830.055255GB/s,830.055255GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",95.974609KB/s,95.974609KB/s,95.973633KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",12.968750KB/s,12.968750KB/s,12.967773KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",163507200,163507200,163507200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",128512631,128512631,128512631 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006173%,0.006173%,0.006173% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.553559%,0.553559%,0.553559% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.347643%,99.347643%,99.347643% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.089567%,0.089567%,0.089567% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000861%,0.000861%,0.000861% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001282%,0.001282%,0.001282% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000008%,0.000008%,0.000008% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2727116800,2727116800,2727116800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",128512631,128512631,128512631 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000018%,0.000018%,0.000018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000889%,0.000889%,0.000889% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.107385,0.107385,0.107385 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.107396,0.107396,0.107396 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.684898%,2.684898%,2.684898% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.180362%,99.180362%,99.180362% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999287,0.999287,0.999287 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.107978,0.107978,0.107978 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31679.000000,31679.000000,31679.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.800664%,98.800664%,98.800664% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000104,0.000104,0.000104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",335479966,335479966,335479966 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",35,35,35 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",335218622,335218622,335218622 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96763,96763,96763 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.018018%,0.018018%,0.018018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.037604%,50.037604%,50.037604% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",824.053051GB/s,824.053051GB/s,824.053051GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",243.577022MB/s,243.577022MB/s,243.577021MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1649.757745GB/s,1649.757745GB/s,1649.757745GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",824.705074GB/s,824.705074GB/s,824.705074GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",824.695501GB/s,824.695501GB/s,824.695501GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",90.217773KB/s,90.217773KB/s,90.216797KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",12.887695KB/s,12.887695KB/s,12.886719KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",162196480,162196480,162196480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",127198767,127198767,127198767 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006145%,0.006145%,0.006145% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.545789%,0.545789%,0.545789% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.347202%,99.347202%,99.347202% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.096608%,0.096608%,0.096608% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000857%,0.000857%,0.000857% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002391%,0.002391%,0.002391% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000008%,0.000008%,0.000008% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2685173760,2685173760,2685173760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",127198767,127198767,127198767 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",335483860,335483860,335483860 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000018%,0.000018%,0.000018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000981%,0.000981%,0.000981% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.105731,0.105731,0.105731 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.105742,0.105742,0.105742 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.643542%,2.643542%,2.643542% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.126364%,99.126364%,99.126364% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999321,0.999321,0.999321 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.106436,0.106436,0.106436 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",40255.000000,40255.000000,40255.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.056173%,99.056173%,99.056173% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000105,0.000105,0.000105 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671089862,671089862,671089862 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",47,47,47 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",671089442,671089442,671089442 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",96215,96215,96215 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",833.565227GB/s,833.565227GB/s,833.565227GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",122.377597MB/s,122.377597MB/s,122.377596MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",833.589669GB/s,833.589669GB/s,833.589669GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",833.564231GB/s,833.564231GB/s,833.564231GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",833.565748GB/s,833.565748GB/s,833.565748GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",61.214844KB/s,61.214844KB/s,61.213867KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.511719KB/s,6.511719KB/s,6.510742KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",206105600,206105600,206105600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",171113524,171113524,171113524 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003230%,0.003230%,0.003230% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.360916%,0.360916%,0.360916% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.632548%,99.632548%,99.632548% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001566%,0.001566%,0.001566% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000436%,0.000436%,0.000436% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000965%,0.000965%,0.000965% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000117%,0.000117%,0.000117% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4090265600,4090265600,4090265600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",171113524,171113524,171113524 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000213%,0.000213%,0.000213% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.072294,0.072294,0.072294 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.072302,0.072302,0.072302 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.807546%,1.807546%,1.807546% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",98.464291%,98.464291%,98.464291% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999006,0.999006,0.999006 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072370,0.072370,0.072370 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",11906.000000,11906.000000,11906.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.925437%,98.925437%,98.925437% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000646,0.000646,0.000646 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",45283998,45283998,45283998 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772205,167772205,167772205 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",44941546,44941546,44941546 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",61075063,61075063,61075063 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1010.912599GB/s,1010.912599GB/s,1010.912599GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1010.912599GB/s,1010.912599GB/s,1010.912599GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",7.618821%,7.618821%,7.618821% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",49.889852%,49.889852%,49.889852% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",63.798742%,63.798742%,63.798742% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",270.795674GB/s,270.795674GB/s,270.795674GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",368.008320GB/s,368.008320GB/s,368.008320GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",758.307852GB/s,758.307852GB/s,758.307852GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",273.190247GB/s,273.190247GB/s,273.190247GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1010.912599GB/s,1010.912599GB/s,1010.912599GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",272.859121GB/s,272.859121GB/s,272.859121GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1010.912870GB/s,1010.912870GB/s,1010.912870GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",31.590820KB/s,31.590820KB/s,31.589844KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",62919680,62919680,62919680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",43315200,43315200,43315200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",43343184,43343184,43343184 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006551%,0.006551%,0.006551% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.053314%,0.053314%,0.053314% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",6.018004%,6.018004%,6.018004% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",91.635725%,91.635725%,91.635725% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.007413%,0.007413%,0.007413% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003800%,0.003800%,0.003800% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000366%,0.000366%,0.000366% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",420741120,420741120,420741120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",922746880,922746880,922746880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",31784960,31784960,31784960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",43343184,43343184,43343184 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",28846080,28846080,28846080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",28846080,28846080,28846080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",45338952,45338952,45338952 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.900568%,1.900568%,1.900568% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.374259%,0.374259%,0.374259% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.116047,0.116047,0.116047 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.091260,0.091260,0.091260 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.281491%,2.281491%,2.281491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",96.816299%,96.816299%,96.816299% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.989517,0.989517,0.989517 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.327937,0.327937,0.327937 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39999.000000,39999.000000,39999.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.050133%,99.050133%,99.050133% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000107,0.000107,0.000107 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671090414,671090414,671090414 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",42,42,42 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",670512574,670512574,670512574 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",97306,97306,97306 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.076563%,0.076563%,0.076563% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",824.206250GB/s,824.206250GB/s,824.206250GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",122.480951MB/s,122.480951MB/s,122.480950MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",824.939535GB/s,824.939535GB/s,824.939535GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",824.914360GB/s,824.914360GB/s,824.914360GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",824.916541GB/s,824.916541GB/s,824.916541GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",54.134766KB/s,54.134766KB/s,54.133789KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.444336KB/s,6.444336KB/s,6.443359KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",169784320,169784320,169784320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",169806175,169806175,169806175 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003252%,0.003252%,0.003252% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.360187%,0.360187%,0.360187% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.632568%,99.632568%,99.632568% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001676%,0.001676%,0.001676% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000438%,0.000438%,0.000438% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001455%,0.001455%,0.001455% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000121%,0.000121%,0.000121% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4048322560,4048322560,4048322560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",169806175,169806175,169806175 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000294%,0.000294%,0.000294% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.079717,0.079717,0.079717 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.072093,0.072093,0.072093 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.802332%,1.802332%,1.802332% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",95.033981%,95.033981%,95.033981% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999104,0.999104,0.999104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072297,0.072297,0.072297 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_per_warp","Instructions per warp",8838.000000,8838.000000,8838.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.552416%,98.552416%,98.552416% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_replay_overhead","Instruction Replay Overhead",0.000735,0.000738,0.000737 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gst_transactions","Global Store Transactions",33554432,33554432,33554432 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_read_transactions","L2 Read Transactions",5222,5222,5222 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_write_transactions","L2 Write Transactions",33554448,33554448,33554448 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"dram_read_transactions","Device Memory Read Transactions",138,298,218 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"dram_write_transactions","Device Memory Write Transactions",290,340,315 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gst_requested_throughput","Requested Global Store Throughput",133.552823GB/s,135.236800GB/s,134.389537GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gst_throughput","Global Store Throughput",133.552823GB/s,135.236800GB/s,134.389537GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"tex_cache_hit_rate","Unified Cache Hit Rate",3.015507%,3.015507%,3.015507% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",99.375000%,99.375000%,99.375000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999809%,99.999809%,99.999809% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"dram_read_throughput","Device Memory Read Throughput",583.208008KB/s,1.214561MB/s,915.527344KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"dram_write_throughput","Device Memory Write Throughput",1.181955MB/s,1.403214MB/s,1.291891MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"tex_cache_throughput","Unified cache to SM throughput",8.363354GB/s,8.468808GB/s,8.415751GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",20.867628MB/s,21.130750MB/s,20.998364MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",133.552823GB/s,135.236800GB/s,134.389537GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_read_throughput","L2 Throughput (Reads)",21.283350MB/s,21.551714MB/s,21.416691MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_write_throughput","L2 Throughput (Writes)",133.552887GB/s,135.236865GB/s,134.389601GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"sysmem_write_throughput","System Memory Write Throughput",20.867188KB/s,21.129883KB/s,20.997070KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"tex_cache_transactions","Unified cache to SM transactions",1049600,1049600,1049600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_executed","Instructions Executed",5521408,5521408,5521408 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_issued","Instructions Issued",5525468,5525493,5525480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007138%,0.007472%,0.007305% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.027661%,0.028040%,0.027850% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.309251%,2.313859%,2.311555% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_texture","Issue Stall Reasons (Texture)",0.000952%,0.000953%,0.000953% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_other","Issue Stall Reasons (Other)",0.001484%,0.001518%,0.001501% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006528%,0.006682%,0.006605% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000066%,0.000067%,0.000067% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_integer","Integer Instructions",33947648,33947648,33947648 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_control","Control-Flow Instructions",2097152,2097152,2097152 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_compute_ld_st","Load/Store Instructions",138412032,138412032,138412032 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_misc","Misc Instructions",2162688,2162688,2162688 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"issue_slots","Issue Slots",5525468,5525493,5525480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"cf_issued","Issued Control-Flow Instructions",67584,67584,67584 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"cf_executed","Executed Control-Flow Instructions",67584,67584,67584 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"ldst_issued","Issued Load/Store Instructions",4327424,4327424,4327424 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"ldst_executed","Executed Load/Store Instructions",4327424,4327424,4327424 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.515531%,97.522695%,97.519113% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.124225%,0.125878%,0.125052% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",33554432,33554432,33554432 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"ipc","Executed IPC",0.015156,0.015353,0.015255 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"issued_ipc","Issued IPC",0.009114,0.009233,0.009174 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"issue_slot_utilization","Issue Slot Utilization",0.227852%,0.230826%,0.229339% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"sm_efficiency","Multiprocessor Activity",80.784143%,80.793102%,80.788622% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"achieved_occupancy","Achieved Occupancy",0.210450,0.210515,0.210482 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.025730,0.026055,0.025892 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",2,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",11906.000000,11906.000000,11906.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.925437%,98.925437%,98.925437% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001067,0.001067,0.001067 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",94528278,94528278,94528278 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544369,335544369,335544369 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",92286545,92286545,92286545 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",121620856,121620856,121620856 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",920.947607GB/s,920.947607GB/s,920.947607GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",920.947607GB/s,920.947607GB/s,920.947607GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",920.947607GB/s,920.947607GB/s,920.947607GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",6.898770%,6.898770%,6.898770% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",1.461617%,1.461617%,1.461617% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",64.460322%,64.460322%,64.460322% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",253.293135GB/s,253.293135GB/s,253.293135GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",333.805192GB/s,333.805192GB/s,333.805192GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",345.411563GB/s,345.411563GB/s,345.411563GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",257.996025GB/s,257.996025GB/s,257.996025GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",920.947607GB/s,920.947607GB/s,920.947607GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",259.445880GB/s,259.445880GB/s,259.445880GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",920.947742GB/s,920.947742GB/s,920.947742GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",14.389648KB/s,14.389648KB/s,14.388672KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",62919680,62919680,62919680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",43315200,43315200,43315200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",43361405,43361405,43361405 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006136%,0.006136%,0.006136% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.064709%,0.064709%,0.064709% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",9.318240%,9.318240%,9.318240% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",83.977075%,83.977075%,83.977075% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.007393%,0.007393%,0.007393% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004736%,0.004736%,0.004736% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001089%,0.001089%,0.001089% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",399769600,399769600,399769600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",922746880,922746880,922746880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",52756480,52756480,52756480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",43361405,43361405,43361405 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",28846080,28846080,28846080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",28846080,28846080,28846080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",94000028,94000028,94000028 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",6.430223%,6.430223%,6.430223% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.190398%,0.190398%,0.190398% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.065399,0.065399,0.065399 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.049736,0.049736,0.049736 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.243411%,1.243411%,1.243411% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",80.179452%,80.179452%,80.179452% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.589293,0.589293,0.589293 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.121753,0.121753,0.121753 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",32195.000000,32195.000000,32195.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.819887%,98.819887%,98.819887% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000098,0.000098,0.000098 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",11166,11166,11166 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",266,266,266 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",216,216,216 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.998474%,99.998474%,99.998474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",97.500000%,97.500000%,97.500000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",1.210966MB/s,1.210966MB/s,1.210965MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",0.983341MB/s,0.983341MB/s,0.983340MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",2983.622256GB/s,2983.622256GB/s,2983.622256GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",46.617675MB/s,46.617675MB/s,46.617674MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",50.833297MB/s,50.833297MB/s,50.833296MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",74.587891KB/s,74.587891KB/s,74.586914KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.308594KB/s,23.308594KB/s,23.307617KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",164838400,164838400,164838400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",129843712,129843712,129843712 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008064%,0.008064%,0.008064% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.984972%,0.984972%,0.984972% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.996904%,98.996904%,98.996904% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.002923%,0.002923%,0.002923% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001541%,0.001541%,0.001541% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004628%,0.004628%,0.004628% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000248%,0.000248%,0.000248% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",2769715200,2769715200,2769715200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",129843712,129843712,129843712 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000688%,0.000688%,0.000688% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.193413,0.193413,0.193413 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.193432,0.193432,0.193432 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",4.835802%,4.835802%,4.835802% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",99.926584%,99.926584%,99.926584% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999439,0.999439,0.999439 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.193863,0.193863,0.193863 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",8838.000000,8838.000000,8838.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.552416%,98.552416%,98.552416% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000225,0.000225,0.000225 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",5950,5950,5950 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772176,167772176,167772176 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",266,266,266 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",1921,1921,1921 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",497.353186GB/s,497.353186GB/s,497.353186GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",497.353186GB/s,497.353186GB/s,497.353186GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.027344%,3.027344%,3.027344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",97.500000%,97.500000%,97.500000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999847%,99.999847%,99.999847% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",826.849609KB/s,826.849609KB/s,826.848633KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",5.831393MB/s,5.831393MB/s,5.831392MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",31.145286GB/s,31.145286GB/s,31.145286GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",15.542287MB/s,15.542287MB/s,15.542286MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",497.353186GB/s,497.353186GB/s,497.353186GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",18.061837MB/s,18.061837MB/s,18.061836MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",497.353233GB/s,497.353233GB/s,497.353233GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",15.541992KB/s,15.541992KB/s,15.541016KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",45250560,45250560,45250560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",27613238,27613238,27613238 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003158%,0.003158%,0.003158% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.020467%,0.020467%,0.020467% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.526783%,0.526783%,0.526783% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.056519%,0.056519%,0.056519% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.001979%,0.001979%,0.001979% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002081%,0.002081%,0.002081% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000091%,0.000091%,0.000091% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",169738240,169738240,169738240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",27613238,27613238,27613238 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",5120,5120,5120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.818380%,98.818380%,98.818380% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.570542%,0.570542%,0.570542% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.031477,0.031477,0.031477 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.031494,0.031494,0.031494 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",0.787354%,0.787354%,0.787354% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",87.089998%,87.089998%,87.089998% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.984231,0.984231,0.984231 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.388005,0.388005,0.388005 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",12930.000000,12930.000000,12930.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.010538%,99.010538%,99.010538% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000659,0.000659,0.000659 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",72374778,72374778,72374778 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772203,167772203,167772203 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",72130186,72130186,72130186 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",87315431,87315431,87315431 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",764.780219GB/s,764.780219GB/s,764.780219GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",764.780219GB/s,764.780219GB/s,764.780219GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",4.579654%,4.579654%,4.579654% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",49.836622%,49.836622%,49.836622% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",48.199356%,48.199356%,48.199356% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",328.801509GB/s,328.801509GB/s,328.801509GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",398.022619GB/s,398.022619GB/s,398.022619GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",764.873576GB/s,764.873576GB/s,764.873576GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",329.853681GB/s,329.853681GB/s,329.853681GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",764.780219GB/s,764.780219GB/s,764.780219GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",329.916469GB/s,329.916469GB/s,329.916469GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",764.780415GB/s,764.780415GB/s,764.780415GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.898438KB/s,23.898438KB/s,23.897461KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",83891200,83891200,83891200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",48558080,48558080,48558080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",48591093,48591093,48591093 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005106%,0.005106%,0.005106% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.056479%,0.056479%,0.056479% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",9.065956%,9.065956%,9.065956% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",89.065235%,89.065235%,89.065235% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.005316%,0.005316%,0.005316% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003149%,0.003149%,0.003149% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000147%,0.000147%,0.000147% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",462684160,462684160,462684160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1006632960,1006632960,1006632960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",73728000,73728000,73728000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",48591093,48591093,48591093 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",31467520,31467520,31467520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",31467520,31467520,31467520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",72361004,72361004,72361004 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.438985%,1.438985%,1.438985% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.359627%,0.359627%,0.359627% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.095268,0.095268,0.095268 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.076924,0.076924,0.076924 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.923110%,1.923110%,1.923110% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",96.938465%,96.938465%,96.938465% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.991801,0.991801,0.991801 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.304858,0.304858,0.304858 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",13250.000000,13250.000000,13250.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.034434%,99.034434%,99.034434% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000898,0.000898,0.000898 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",148395054,148395054,148395054 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544375,335544375,335544375 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",148180342,148180342,148180342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",170814480,170814480,170814480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",781.831854GB/s,781.831854GB/s,781.831854GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",781.831854GB/s,781.831854GB/s,781.831854GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",781.831854GB/s,781.831854GB/s,781.831854GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.878551%,3.878551%,3.878551% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.097567%,0.097567%,0.097567% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",49.285283%,49.285283%,49.285283% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",345.266198GB/s,345.266198GB/s,345.266198GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",398.004656GB/s,398.004656GB/s,398.004656GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",390.963646GB/s,390.963646GB/s,390.963646GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",345.430312GB/s,345.430312GB/s,345.430312GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",781.831854GB/s,781.831854GB/s,781.831854GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",345.766485GB/s,345.766485GB/s,345.766485GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",781.831982GB/s,781.831982GB/s,781.831982GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",12.215820KB/s,12.215820KB/s,12.214844KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",83891200,83891200,83891200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",50196480,50196480,50196480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",50241539,50241539,50241539 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005413%,0.005413%,0.005413% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.086348%,0.086348%,0.086348% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",9.678279%,9.678279%,9.678279% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",88.938929%,88.938929%,88.938929% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.004188%,0.004188%,0.004188% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003845%,0.003845%,0.003845% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000629%,0.000629%,0.000629% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",462684160,462684160,462684160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1006632960,1006632960,1006632960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",126156800,126156800,126156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",50241539,50241539,50241539 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",31467520,31467520,31467520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",31467520,31467520,31467520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",148250776,148250776,148250776 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.113792%,1.113792%,1.113792% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.168579%,0.168579%,0.168579% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.052434,0.052434,0.052434 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.040712,0.040712,0.040712 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.017808%,1.017808%,1.017808% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",96.421037%,96.421037%,96.421037% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.496579,0.496579,0.496579 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.094165,0.094165,0.094165 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",8710.000000,8710.000000,8710.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.531142%,98.531142%,98.531142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000266,0.000266,0.000266 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",3382,3382,3382 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886096,83886096,83886096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",522,522,522 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",2481,2481,2481 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1013.795321GB/s,1013.795321GB/s,1013.795321GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1013.795321GB/s,1013.795321GB/s,1013.795321GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.879481%,5.879481%,5.879481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999390%,99.999390%,99.999390% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",6.459974MB/s,6.459974MB/s,6.459973MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",30.703444MB/s,30.703444MB/s,30.703444MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",126.971924GB/s,126.971924GB/s,126.971924GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",31.681104MB/s,31.681104MB/s,31.681103MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1013.795321GB/s,1013.795321GB/s,1013.795321GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",41.853707MB/s,41.853707MB/s,41.853706MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1013.795515GB/s,1013.795515GB/s,1013.795515GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",63.361328KB/s,63.361328KB/s,63.360352KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",44595200,44595200,44595200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",26958548,26958548,26958548 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.012406%,0.012406%,0.012406% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.068291%,0.068291%,0.068291% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.652509%,0.652509%,0.652509% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.047222%,0.047222%,0.047222% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.004774%,0.004774%,0.004774% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.011318%,0.011318%,0.011318% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000158%,0.000158%,0.000158% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",159088640,159088640,159088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",491520,491520,491520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",26958548,26958548,26958548 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2560,2560,2560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.012032%,97.012032%,97.012032% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",2.191290%,2.191290%,2.191290% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.118756,0.118756,0.118756 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.118787,0.118787,0.118787 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",2.969680%,2.969680%,2.969680% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",91.727011%,91.727011%,91.727011% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.971119,0.971119,0.971119 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.471322,1.471322,1.471322 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",27967.000000,27967.000000,27967.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.641479%,98.641479%,98.641479% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000238,0.000238,0.000238 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",41318,41318,41318 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",32,32,32 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",40970,40970,40970 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",14018,14018,14018 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.987793%,99.987793%,99.987793% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",371.785118MB/s,371.785118MB/s,371.785117MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",127.207317MB/s,127.207317MB/s,127.207316MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5947.291464GB/s,5947.291464GB/s,5947.291464GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",371.694373MB/s,371.694373MB/s,371.694372MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",374.943069MB/s,374.943069MB/s,374.943068MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",297.355469KB/s,297.355469KB/s,297.354492KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.460938KB/s,46.460938KB/s,46.459961KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",143191040,143191040,143191040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",108206146,108206146,108206146 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.026352%,0.026352%,0.026352% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.777994%,1.777994%,1.777994% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",56.852633%,56.852633%,56.852633% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",40.106000%,40.106000%,40.106000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003241%,0.003241%,0.003241% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.025043%,0.025043%,0.025043% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001036%,0.001036%,0.001036% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2076999680,2076999680,2076999680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",108206146,108206146,108206146 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",40960,40960,40960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000069%,0.000069%,0.000069% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.207632%,1.207632%,1.207632% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.321784,0.321784,0.321784 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.321860,0.321860,0.321860 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",8.046510%,8.046510%,8.046510% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.720047%,99.720047%,99.720047% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.983280,0.983280,0.983280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.053938,1.053938,1.053938 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31551.000000,31551.000000,31551.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.795799%,98.795799%,98.795799% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000113,0.000113,0.000113 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",127149966,127149966,127149966 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",125996565,125996565,125996565 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",93529,93529,93529 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",62.107639%,62.107639%,62.107639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.413114%,50.413114%,50.413114% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",668.776048GB/s,668.776048GB/s,668.776048GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",508.356349MB/s,508.356349MB/s,508.356348MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",3562.174134GB/s,3562.174134GB/s,3562.174134GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",674.875340GB/s,674.875340GB/s,674.875340GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",674.898174GB/s,674.898174GB/s,674.898174GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",200.365234KB/s,200.365234KB/s,200.364258KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",27.828125KB/s,27.828125KB/s,27.827148KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",161541120,161541120,161541120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",126541055,126541055,126541055 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.013650%,0.013650%,0.013650% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.195653%,1.195653%,1.195653% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.778651%,98.778651%,98.778651% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.005795%,0.005795%,0.005795% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.001919%,0.001919%,0.001919% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003033%,0.003033%,0.003033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000467%,0.000467%,0.000467% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2664202240,2664202240,2664202240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",126541055,126541055,126541055 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",127145664,127145664,127145664 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000040%,0.000040%,0.000040% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000792%,0.000792%,0.000792% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.230631,0.230631,0.230631 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.230657,0.230657,0.230657 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.766432%,5.766432%,5.766432% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",97.264541%,97.264541%,97.264541% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999162,0.999162,0.999162 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.231230,0.231230,0.231230 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","High (7)","High (7)","High (7)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",9474.000000,9474.000000,9474.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.649594%,98.649594%,98.649594% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000359,0.000359,0.000359 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",82502,82502,82502 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886096,83886096,83886096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",82012,82012,82012 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",151576,151576,151576 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1341.529902GB/s,1341.529902GB/s,1341.529902GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1341.529902GB/s,1341.529902GB/s,1341.529902GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",19.921875%,19.921875%,19.921875% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.456477%,99.456477%,99.456477% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",1.311559GB/s,1.311559GB/s,1.311559GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",2.424046GB/s,2.424046GB/s,2.424046GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",671.092473GB/s,671.092473GB/s,671.092473GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1.310088GB/s,1.310088GB/s,1.310088GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1341.529902GB/s,1341.529902GB/s,1341.529902GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",1.319395GB/s,1.319395GB/s,1.319395GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1341.530158GB/s,1341.530158GB/s,1341.530158GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",83.844727KB/s,83.844727KB/s,83.843750KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",20976640,20976640,20976640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",48506880,48506880,48506880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",30874439,30874439,30874439 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.015761%,0.015761%,0.015761% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.122667%,0.122667%,0.122667% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.129769%,2.129769%,2.129769% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",40.637482%,40.637482%,40.637482% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.007671%,0.007671%,0.007671% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006961%,0.006961%,0.006961% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000428%,0.000428%,0.000428% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",211025920,211025920,211025920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",754974720,754974720,754974720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",30874439,30874439,30874439 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",23603200,23603200,23603200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",23603200,23603200,23603200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",81920,81920,81920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",55.532699%,55.532699%,55.532699% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.546562%,1.546562%,1.546562% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.169433,0.169433,0.169433 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.169299,0.169299,0.169299 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.232485%,4.232485%,4.232485% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",97.453486%,97.453486%,97.453486% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.955275,0.955275,0.955275 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.120143,1.120143,1.120143 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",11138.000000,11138.000000,11138.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.851342%,98.851342%,98.851342% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000624,0.000624,0.000624 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",1454550,1454550,1454550 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910323,83910323,83910323 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",216494,216494,216494 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",7021834,7021834,7021834 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1260.511405GB/s,1260.511405GB/s,1260.511405GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1260.511405GB/s,1260.511405GB/s,1260.511405GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",37.401502%,37.401502%,37.401502% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",92.473294%,92.473294%,92.473294% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",92.139530%,92.139530%,92.139530% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",3.253140GB/s,3.253140GB/s,3.253140GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",105.513356GB/s,105.513356GB/s,105.513356GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1575.946998GB/s,1575.946998GB/s,1575.946998GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",21.713064GB/s,21.713064GB/s,21.713064GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1260.511405GB/s,1260.511405GB/s,1260.511405GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",21.856747GB/s,21.856747GB/s,21.856747GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1260.875691GB/s,1260.875691GB/s,1260.875691GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",78.781250KB/s,78.781250KB/s,78.780273KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",52433920,52433920,52433920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",57026560,57026560,57026560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",39407609,39407609,39407609 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018868%,0.018868%,0.018868% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.172540%,0.172540%,0.172540% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.484877%,0.484877%,0.484877% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.320957%,95.320957%,95.320957% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.011958%,0.011958%,0.011958% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.010913%,0.010913%,0.010913% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000505%,0.000505%,0.000505% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",336855040,336855040,336855040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",880803840,880803840,880803840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",31784960,31784960,31784960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",39407609,39407609,39407609 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",27535360,27535360,27535360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",27535360,27535360,27535360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",1444988,1444988,1444988 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",3.143092%,3.143092%,3.143092% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.836290%,0.836290%,0.836290% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.211161,0.211161,0.211161 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.211282,0.211282,0.211282 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.282046%,5.282046%,5.282046% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",93.935477%,93.935477%,93.935477% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.982704,0.982704,0.982704 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.730313,0.730313,0.730313 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31679.000000,31679.000000,31679.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.800664%,98.800664%,98.800664% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000114,0.000114,0.000114 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",144291250,144291250,144291250 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",23549,23549,23549 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",142723730,142723730,142723730 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",96311,96311,96311 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",56.992244%,56.992244%,56.992244% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.539860%,50.539860%,50.539860% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",710.228346GB/s,710.228346GB/s,710.228346GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",490.769610MB/s,490.769610MB/s,490.769609MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",3339.603864GB/s,3339.603864GB/s,3339.603864GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",718.122433GB/s,718.122433GB/s,718.122433GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",718.028710GB/s,718.028710GB/s,718.028710GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",119.998064MB/s,119.998064MB/s,119.998063MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",26.089844KB/s,26.089844KB/s,26.088867KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",162196480,162196480,162196480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",127200469,127200469,127200469 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.013503%,0.013503%,0.013503% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.122196%,1.122196%,1.122196% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.849481%,98.849481%,98.849481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.006379%,0.006379%,0.006379% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.001796%,0.001796%,0.001796% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.005404%,0.005404%,0.005404% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000433%,0.000433%,0.000433% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2685173760,2685173760,2685173760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",127200469,127200469,127200469 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",144310084,144310084,144310084 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000038%,0.000038%,0.000038% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000770%,0.000770%,0.000770% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.216418,0.216418,0.216418 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.217041,0.217041,0.217041 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.426034%,5.426034%,5.426034% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",97.959883%,97.959883%,97.959883% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999141,0.999141,0.999141 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.217534,0.217534,0.217534 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",30655.000000,30655.000000,30655.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.760602%,98.760602%,98.760602% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000235,0.000235,0.000235 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",122982,122982,122982 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",122890,122890,122890 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",49350,49350,49350 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.963379%,99.963379%,99.963379% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",1.088747GB/s,1.088747GB/s,1.088747GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",447.710729MB/s,447.710729MB/s,447.710728MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5945.707346GB/s,5945.707346GB/s,5945.707346GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1.088658GB/s,1.088658GB/s,1.088658GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",1.089562GB/s,1.089562GB/s,1.089562GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.637695KB/s,148.637695KB/s,148.636719KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.449219KB/s,46.449219KB/s,46.448242KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",121943040,121943040,121943040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",121971705,121971705,121971705 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023081%,0.023081%,0.023081% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.846564%,1.846564%,1.846564% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",55.573994%,55.573994%,55.573994% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",41.336569%,41.336569%,41.336569% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003062%,0.003062%,0.003062% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.027399%,0.027399%,0.027399% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001040%,0.001040%,0.001040% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2517401600,2517401600,2517401600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",121971705,121971705,121971705 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",122880,122880,122880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000064%,0.000064%,0.000064% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.188227%,1.188227%,1.188227% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.439247,0.439247,0.439247 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.362582,0.362582,0.362582 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.064556%,9.064556%,9.064556% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.683554%,99.683554%,99.683554% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.976616,0.976616,0.976616 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.134975,1.134975,1.134975 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",10626.000000,10626.000000,10626.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.795996%,98.795996%,98.795996% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000486,0.000486,0.000486 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",163942,163942,163942 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886125,83886125,83886125 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",164198,164198,164198 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",5395039,5395039,5395039 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1308.104703GB/s,1308.104703GB/s,1308.104703GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1308.104703GB/s,1308.104703GB/s,1308.104703GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",33.203125%,33.203125%,33.203125% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",94.071341%,94.071341%,94.071341% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",2.560475GB/s,2.560475GB/s,2.560475GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",84.129284GB/s,84.129284GB/s,84.129284GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1308.424064GB/s,1308.424064GB/s,1308.424064GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",2.554892GB/s,2.554892GB/s,2.554892GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1308.104703GB/s,1308.104703GB/s,1308.104703GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",2.556483GB/s,2.556483GB/s,2.556483GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1308.105405GB/s,1308.105405GB/s,1308.105405GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",81.755859KB/s,81.755859KB/s,81.754883KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",41948160,41948160,41948160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",54405120,54405120,54405120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",36777178,36777178,36777178 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018571%,0.018571%,0.018571% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.161231%,0.161231%,0.161231% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.475074%,0.475074%,0.475074% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.078937%,95.078937%,95.078937% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.009783%,0.009783%,0.009783% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.008248%,0.008248%,0.008248% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001078%,0.001078%,0.001078% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",294912000,294912000,294912000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",838860800,838860800,838860800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",31784960,31784960,31784960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",36777178,36777178,36777178 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",26224640,26224640,26224640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",26224640,26224640,26224640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",163840,163840,163840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",3.567765%,3.567765%,3.567765% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.679314%,0.679314%,0.679314% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.197205,0.197205,0.197205 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.197301,0.197301,0.197301 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.932515%,4.932515%,4.932515% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",97.545705%,97.545705%,97.545705% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.983208,0.983208,0.983208 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.617848,0.617848,0.617848 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",12418.000000,12418.000000,12418.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.969742%,98.969742%,98.969742% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000429,0.000429,0.000429 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",9571966,9571966,9571966 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910286,83910286,83910286 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",6985150,6985150,6985150 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",16121269,16121269,16121269 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1132.756836GB/s,1132.756836GB/s,1132.756836GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1132.756836GB/s,1132.756836GB/s,1132.756836GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",40.608915%,40.608915%,40.608915% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",62.050395%,62.050395%,62.050395% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",81.824045%,81.824045%,81.824045% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",94.324069GB/s,94.324069GB/s,94.324069GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",217.693778GB/s,217.693778GB/s,217.693778GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1982.601014GB/s,1982.601014GB/s,1982.601014GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",128.661749GB/s,128.661749GB/s,128.661749GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1132.756836GB/s,1132.756836GB/s,1132.756836GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",129.255175GB/s,129.255175GB/s,129.255175GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1133.083702GB/s,1133.083702GB/s,1133.083702GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",70.796875KB/s,70.796875KB/s,70.795898KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",73405440,73405440,73405440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",63580160,63580160,63580160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",45962992,45962992,45962992 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018057%,0.018057%,0.018057% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.207296%,0.207296%,0.207296% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.163011%,2.163011%,2.163011% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",94.611510%,94.611510%,94.611510% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.007667%,0.007667%,0.007667% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.021638%,0.021638%,0.021638% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001050%,0.001050%,0.001050% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",420741120,420741120,420741120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",964689920,964689920,964689920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",73728000,73728000,73728000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",45962992,45962992,45962992 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",30156800,30156800,30156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",30156800,30156800,30156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",9528020,9528020,9528020 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.884133%,1.884133%,1.884133% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.085638%,1.085638%,1.085638% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.232778,0.232778,0.232778 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.232878,0.232878,0.232878 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.821956%,5.821956%,5.821956% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",89.351179%,89.351179%,89.351179% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.985497,0.985497,0.985497 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.908812,0.908812,0.908812 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",8710.000000,8710.000000,8710.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.531142%,98.531142%,98.531142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000223,0.000223,0.000223 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",3150,3150,3150 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886096,83886096,83886096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",274,274,274 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",675,675,675 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",280.143276GB/s,280.143276GB/s,280.143276GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",280.143276GB/s,280.143276GB/s,280.143276GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.879481%,5.879481%,5.879481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",97.500000%,97.500000%,97.500000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999847%,99.999847%,99.999847% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",959.490234KB/s,959.490234KB/s,959.489258KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",2.308309MB/s,2.308309MB/s,2.308308MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",35.086304GB/s,35.086304GB/s,35.086304GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",8.754477MB/s,8.754477MB/s,8.754476MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",280.143276GB/s,280.143276GB/s,280.143276GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",10.772110MB/s,10.772110MB/s,10.772109MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",280.143330GB/s,280.143330GB/s,280.143330GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",17.508789KB/s,17.508789KB/s,17.507812KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",44595200,44595200,44595200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",26957776,26957776,26957776 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003943%,0.003943%,0.003943% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.018490%,0.018490%,0.018490% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.492181%,0.492181%,0.492181% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.053617%,0.053617%,0.053617% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003047%,0.003047%,0.003047% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001406%,0.001406%,0.001406% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000070%,0.000070%,0.000070% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",159088640,159088640,159088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",491520,491520,491520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",26957776,26957776,26957776 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2560,2560,2560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.820662%,98.820662%,98.820662% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.606584%,0.606584%,0.606584% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.032899,0.032899,0.032899 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.032906,0.032906,0.032906 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",0.822656%,0.822656%,0.822656% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",91.621462%,91.621462%,91.621462% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.989143,0.989143,0.989143 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.415799,0.415799,0.415799 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31679.000000,31679.000000,31679.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.800664%,98.800664%,98.800664% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000089,0.000089,0.000089 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",145148006,145148006,145148006 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",140938334,140938334,140938334 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",96571,96571,96571 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",56.731141%,56.731141%,56.731141% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.496491%,51.496491%,51.496491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",705.469102GB/s,705.469102GB/s,705.469102GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",494.989001MB/s,494.989001MB/s,494.989000MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",3359.247517GB/s,3359.247517GB/s,3359.247517GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",726.731864GB/s,726.731864GB/s,726.731864GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",726.540683GB/s,726.540683GB/s,726.540683GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",188.951172KB/s,188.951172KB/s,188.950195KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",26.243164KB/s,26.243164KB/s,26.242188KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",127185920,127185920,127185920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",127197205,127197205,127197205 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.012878%,0.012878%,0.012878% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.131106%,1.131106%,1.131106% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.842086%,98.842086%,98.842086% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.007999%,0.007999%,0.007999% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.001810%,0.001810%,0.001810% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002846%,0.002846%,0.002846% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000396%,0.000396%,0.000396% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2685173760,2685173760,2685173760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",127197205,127197205,127197205 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",145186200,145186200,145186200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000038%,0.000038%,0.000038% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000842%,0.000842%,0.000842% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.201633,0.201633,0.201633 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.218049,0.218049,0.218049 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.451228%,5.451228%,5.451228% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",97.769833%,97.769833%,97.769833% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999023,0.999023,0.999023 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.218586,0.218586,0.218586 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (6)","Mid (6)","Mid (6)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",13250.000000,13250.000000,13250.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.034434%,99.034434%,99.034434% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000673,0.000673,0.000673 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",19599982,19599982,19599982 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886115,83886115,83886115 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",19438294,19438294,19438294 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",28666635,28666635,28666635 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1021.423333GB/s,1021.423333GB/s,1021.423333GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1021.423333GB/s,1021.423333GB/s,1021.423333GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",41.968815%,41.968815%,41.968815% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.780342%,50.780342%,50.780342% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",66.927490%,66.927490%,66.927490% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",236.686791GB/s,236.686791GB/s,236.686791GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",349.053977GB/s,349.053977GB/s,349.053977GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",2298.451870GB/s,2298.451870GB/s,2298.451870GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",238.157811GB/s,238.157811GB/s,238.157811GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1021.423333GB/s,1021.423333GB/s,1021.423333GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",238.655555GB/s,238.655555GB/s,238.655555GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1021.423759GB/s,1021.423759GB/s,1021.423759GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",63.838867KB/s,63.838867KB/s,63.837891KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",94376960,94376960,94376960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",67840000,67840000,67840000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",50223663,50223663,50223663 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.013570%,0.013570%,0.013570% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.156240%,0.156240%,0.156240% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",3.801121%,3.801121%,3.801121% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",93.478429%,93.478429%,93.478429% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.008039%,0.008039%,0.008039% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.013880%,0.013880%,0.013880% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000938%,0.000938%,0.000938% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",494141440,494141440,494141440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1048576000,1048576000,1048576000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",52756480,52756480,52756480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",50223663,50223663,50223663 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",32778240,32778240,32778240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",32778240,32778240,32778240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",19559104,19559104,19559104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.359894%,1.359894%,1.359894% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.167889%,1.167889%,1.167889% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.213558,0.213558,0.213558 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.213702,0.213702,0.213702 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.342542%,5.342542%,5.342542% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",95.443305%,95.443305%,95.443305% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.989053,0.989053,0.989053 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.941389,0.941389,0.941389 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",9090.000000,9090.000000,9090.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.592547%,98.592547%,98.592547% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000464,0.000464,0.000464 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",164442,164442,164442 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544368,335544368,335544368 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",163934,163934,163934 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",32936476,32936476,32936476 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",1339.156701GB/s,1339.156701GB/s,1339.156701GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",1339.156701GB/s,1339.156701GB/s,1339.156701GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",1339.156701GB/s,1339.156701GB/s,1339.156701GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.836397%,5.836397%,5.836397% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",94.425331%,94.425331%,94.425331% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",669.962507MB/s,669.962507MB/s,669.962506MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",131.449409GB/s,131.449409GB/s,131.449409GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",83.779029GB/s,83.779029GB/s,83.779029GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",669.578350MB/s,669.578350MB/s,669.578349MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1339.156701GB/s,1339.156701GB/s,1339.156701GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",672.038592MB/s,672.038592MB/s,672.038591MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",1339.156892GB/s,1339.156892GB/s,1339.156892GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",20.923828KB/s,20.923828KB/s,20.922852KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",10490880,10490880,10490880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",46540800,46540800,46540800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",28911063,28911063,28911063 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003800%,0.003800%,0.003800% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.029481%,0.029481%,0.029481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.840746%,0.840746%,0.840746% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.467959%,0.467959%,0.467959% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001877%,0.001877%,0.001877% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001621%,0.001621%,0.001621% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000117%,0.000117%,0.000117% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",200540160,200540160,200540160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",713031680,713031680,713031680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",28911063,28911063,28911063 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",22292480,22292480,22292480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",22292480,22292480,22292480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",163840,163840,163840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.987554%,97.987554%,97.987554% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.666843%,0.666843%,0.666843% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.039613,0.039613,0.039613 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.039632,0.039632,0.039632 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.990792%,0.990792%,0.990792% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",97.391137%,97.391137%,97.391137% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.981787,0.981787,0.981787 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.455929,0.455929,0.455929 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31167.000000,31167.000000,31167.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.780962%,98.780962%,98.780962% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000089,0.000089,0.000089 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",253168246,253168246,253168246 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",249200473,249200473,249200473 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96537,96537,96537 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",24.474045%,24.474045%,24.474045% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.776818%,50.776818%,50.776818% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",800.551300GB/s,800.551300GB/s,800.551300GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",317.566044MB/s,317.566044MB/s,317.566043MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",2155.923991GB/s,2155.923991GB/s,2155.923991GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",814.116246GB/s,814.116246GB/s,814.116246GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",813.297688GB/s,813.297688GB/s,813.297688GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",121.266602KB/s,121.266602KB/s,121.265625KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",16.841797KB/s,16.841797KB/s,16.840820KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",124564480,124564480,124564480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",124575566,124575566,124575566 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008658%,0.008658%,0.008658% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.712040%,0.712040%,0.712040% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.179715%,99.179715%,99.179715% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.095261%,0.095261%,0.095261% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.001155%,0.001155%,0.001155% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001653%,0.001653%,0.001653% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000014%,0.000014%,0.000014% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2601287680,2601287680,2601287680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",124575566,124575566,124575566 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",253423052,253423052,253423052 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000024%,0.000024%,0.000024% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001479%,0.001479%,0.001479% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.138307,0.138307,0.138307 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.137839,0.137839,0.137839 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",3.445977%,3.445977%,3.445977% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",97.442632%,97.442632%,97.442632% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999379,0.999379,0.999379 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.138784,0.138784,0.138784 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39487.000000,39487.000000,39487.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.037816%,99.037816%,99.037816% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000106,0.000106,0.000106 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",653762910,653762910,653762910 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",23539,23539,23539 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",650069746,650069746,650069746 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",94862,94862,94862 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",2.582832%,2.582832%,2.582832% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.500533%,0.500533%,0.500533% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",822.857358GB/s,822.857358GB/s,822.857358GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",122.958013MB/s,122.958013MB/s,122.958012MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",849.488967GB/s,849.488967GB/s,849.488967GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",827.522841GB/s,827.522841GB/s,827.522841GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",827.532159GB/s,827.532159GB/s,827.532159GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",30.510727MB/s,30.510727MB/s,30.510726MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.635742KB/s,6.635742KB/s,6.634766KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",167162880,167162880,167162880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",167180803,167180803,167180803 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003558%,0.003558%,0.003558% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.364615%,0.364615%,0.364615% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.628408%,99.628408%,99.628408% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001148%,0.001148%,0.001148% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000450%,0.000450%,0.000450% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001371%,0.001371%,0.001371% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000127%,0.000127%,0.000127% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3964436480,3964436480,3964436480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",167180803,167180803,167180803 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",653755548,653755548,653755548 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000010%,0.000010%,0.000010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000313%,0.000313%,0.000313% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.079924,0.079924,0.079924 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.072946,0.072946,0.072946 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.823642%,1.823642%,1.823642% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",95.248534%,95.248534%,95.248534% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999326,0.999326,0.999326 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.073147,0.073147,0.073147 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31807.000000,31807.000000,31807.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.805491%,98.805491%,98.805491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000098,0.000098,0.000098 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",323411254,323411254,323411254 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",322535993,322535993,322535993 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96738,96738,96738 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.618013%,3.618013%,3.618013% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.137394%,50.137394%,50.137394% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",823.186645GB/s,823.186645GB/s,823.186645GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",252.823355MB/s,252.823355MB/s,252.823354MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1712.826094GB/s,1712.826094GB/s,1712.826094GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",825.402725GB/s,825.402725GB/s,825.402725GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",825.420514GB/s,825.420514GB/s,825.420514GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",96.342773KB/s,96.342773KB/s,96.341797KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.380859KB/s,13.380859KB/s,13.379883KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",127841280,127841280,127841280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",127854051,127854051,127854051 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006388%,0.006388%,0.006388% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.570088%,0.570088%,0.570088% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.313352%,99.313352%,99.313352% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.106822%,0.106822%,0.106822% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000891%,0.000891%,0.000891% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001289%,0.001289%,0.001289% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000017%,0.000017%,0.000017% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2706145280,2706145280,2706145280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",127854051,127854051,127854051 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",323404284,323404284,323404284 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000019%,0.000019%,0.000019% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001135%,0.001135%,0.001135% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.115041,0.115041,0.115041 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.110573,0.110573,0.110573 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.764329%,2.764329%,2.764329% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.128181%,99.128181%,99.128181% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999291,0.999291,0.999291 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.111322,0.111322,0.111322 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",23871.000000,23871.000000,23871.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408372%,98.408372%,98.408372% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000336,0.000336,0.000336 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",41638,41638,41638 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",40970,40970,40970 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",12126,12126,12126 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.987793%,99.987793%,99.987793% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",371.996836MB/s,371.996836MB/s,371.996835MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",110.100894MB/s,110.100894MB/s,110.100893MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5950.678223GB/s,5950.678223GB/s,5950.678223GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",371.906039MB/s,371.906039MB/s,371.906038MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",378.062100MB/s,378.062100MB/s,378.062099MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.761719KB/s,148.761719KB/s,148.760742KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.487305KB/s,46.487305KB/s,46.486328KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",122219520,122219520,122219520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",87238145,87238145,87238145 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.024132%,0.024132%,0.024132% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.433498%,1.433498%,1.433498% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",55.994387%,55.994387%,55.994387% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",41.201420%,41.201420%,41.201420% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003144%,0.003144%,0.003144% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.024579%,0.024579%,0.024579% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000951%,0.000951%,0.000951% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",1405911040,1405911040,1405911040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",87238145,87238145,87238145 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",40960,40960,40960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000067%,0.000067%,0.000067% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.317821%,1.317821%,1.317821% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.259490,0.259490,0.259490 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.259578,0.259578,0.259578 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",6.489441%,6.489441%,6.489441% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.712160%,99.712160%,99.712160% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.983989,0.983989,0.983989 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.077637,1.077637,1.077637 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",38335.000000,38335.000000,38335.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.008902%,99.008902%,99.008902% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000086,0.000086,0.000086 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",469857030,469857030,469857030 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",51,51,51 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",267181610,267181610,267181610 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",97263,97263,97263 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",29.985909%,29.985909%,29.985909% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",43.024650%,43.024650%,43.024650% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",736.985854GB/s,736.985854GB/s,736.985854GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",274.726280MB/s,274.726280MB/s,274.726279MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",1851.167556GB/s,1851.167556GB/s,1851.167556GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1296.038577GB/s,1296.038577GB/s,1296.038577GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",1296.039741GB/s,1296.039741GB/s,1296.039741GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",147.509766KB/s,147.509766KB/s,147.508789KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",14.460938KB/s,14.460938KB/s,14.459961KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",161264640,161264640,161264640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",161275250,161275250,161275250 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008283%,0.008283%,0.008283% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.815171%,0.815171%,0.815171% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.168157%,99.168157%,99.168157% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.005033%,0.005033%,0.005033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001040%,0.001040%,0.001040% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001456%,0.001456%,0.001456% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000265%,0.000265%,0.000265% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3775692800,3775692800,3775692800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",161275250,161275250,161275250 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",469856608,469856608,469856608 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000022%,0.000022%,0.000022% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000571%,0.000571%,0.000571% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.158749,0.158749,0.158749 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.161744,0.161744,0.161744 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",4.043609%,4.043609%,4.043609% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",89.765880%,89.765880%,89.765880% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999502,0.999502,0.999502 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.162313,0.162313,0.162313 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",9666.000000,9666.000000,9666.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.676417%,98.676417%,98.676417% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000319,0.000319,0.000319 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",123934,123934,123934 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772204,167772204,167772204 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",122894,122894,122894 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",4197388,4197388,4197388 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1287.666830GB/s,1287.666830GB/s,1287.666830GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1287.666830GB/s,1287.666830GB/s,1287.666830GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",8.504464%,8.504464%,8.504464% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.392142%,99.392142%,99.392142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",965.860152MB/s,965.860152MB/s,965.860151MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",32.215341GB/s,32.215341GB/s,32.215341GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",241.594717GB/s,241.594717GB/s,241.594717GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",965.750122MB/s,965.750122MB/s,965.750121MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1287.666830GB/s,1287.666830GB/s,1287.666830GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",974.033818MB/s,974.033818MB/s,974.033817MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1287.667168GB/s,1287.667168GB/s,1287.667168GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",40.239258KB/s,40.239258KB/s,40.238281KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",15733760,15733760,15733760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",49489920,49489920,49489920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",31862323,31862323,31862323 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007352%,0.007352%,0.007352% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.065435%,0.065435%,0.065435% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",1.124049%,1.124049%,1.124049% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.949464%,0.949464%,0.949464% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.004507%,0.004507%,0.004507% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002962%,0.002962%,0.002962% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000095%,0.000095%,0.000095% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",231997440,231997440,231997440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",734003200,734003200,734003200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",42270720,42270720,42270720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",31862323,31862323,31862323 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",22947840,22947840,22947840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",22947840,22947840,22947840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",122880,122880,122880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",96.521966%,96.521966%,96.521966% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.324171%,1.324171%,1.324171% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.084530,0.084530,0.084530 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.084557,0.084557,0.084557 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.113926%,2.113926%,2.113926% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",96.922014%,96.922014%,96.922014% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.975616,0.975616,0.975616 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.909497,0.909497,0.909497 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",10114.000000,10114.000000,10114.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.735045%,98.735045%,98.735045% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001162,0.001162,0.001162 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",19186766,19186766,19186766 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544379,335544379,335544379 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",14411698,14411698,14411698 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",55518389,55518389,55518389 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",884.925054GB/s,884.925054GB/s,884.925054GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",884.925054GB/s,884.925054GB/s,884.925054GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",884.925054GB/s,884.925054GB/s,884.925054GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",10.950385%,10.950385%,10.950385% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",24.979457%,24.979457%,24.979457% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",85.308856%,85.308856%,85.308856% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",38.007714GB/s,38.007714GB/s,38.007714GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",146.417658GB/s,146.417658GB/s,146.417658GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",165.977459GB/s,165.977459GB/s,165.977459GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",50.851495GB/s,50.851495GB/s,50.851495GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",884.925054GB/s,884.925054GB/s,884.925054GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",50.600916GB/s,50.600916GB/s,50.600916GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",884.925210GB/s,884.925210GB/s,884.925210GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.826172KB/s,13.826172KB/s,13.825195KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",31462400,31462400,31462400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",34140160,34140160,34140160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",34179833,34179833,34179833 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.005674%,0.005674%,0.005674% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.045242%,0.045242%,0.045242% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",3.959097%,3.959097%,3.959097% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",87.338305%,87.338305%,87.338305% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.005700%,0.005700%,0.005700% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003175%,0.003175%,0.003175% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000658%,0.000658%,0.000658% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",263454720,263454720,263454720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",796917760,796917760,796917760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",21299200,21299200,21299200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",34179833,34179833,34179833 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",24913920,24913920,24913920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",24913920,24913920,24913920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",19281780,19281780,19281780 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",8.503228%,8.503228%,8.503228% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.138921%,0.138921%,0.138921% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.063761,0.063761,0.063761 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.044474,0.044474,0.044474 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.111861%,1.111861%,1.111861% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",67.830818%,67.830818%,67.830818% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.728580,0.728580,0.728580 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.108660,0.108660,0.108660 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31423.000000,31423.000000,31423.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.790894%,98.790894%,98.790894% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000121,0.000121,0.000121 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",308501946,308501946,308501946 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",306789769,306789769,306789769 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96482,96482,96482 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",8.053132%,8.053132%,8.053132% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.276137%,50.276137%,50.276137% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",817.378103GB/s,817.378103GB/s,817.378103GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",263.225755MB/s,263.225755MB/s,263.225754MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1788.031919GB/s,1788.031919GB/s,1788.031919GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",821.994586GB/s,821.994586GB/s,821.994586GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",821.939846GB/s,821.939846GB/s,821.939846GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",100.573242KB/s,100.573242KB/s,100.572266KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.967773KB/s,13.967773KB/s,13.966797KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",125875200,125875200,125875200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",125890459,125890459,125890459 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006699%,0.006699%,0.006699% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.589407%,0.589407%,0.589407% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.298003%,99.298003%,99.298003% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.102425%,0.102425%,0.102425% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000934%,0.000934%,0.000934% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001358%,0.001358%,0.001358% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000017%,0.000017%,0.000017% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2643230720,2643230720,2643230720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",125890459,125890459,125890459 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",308522492,308522492,308522492 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000020%,0.000020%,0.000020% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001137%,0.001137%,0.001137% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.118180,0.118180,0.118180 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.114087,0.114087,0.114087 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.852169%,2.852169%,2.852169% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",98.166373%,98.166373%,98.166373% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999333,0.999333,0.999333 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.114813,0.114813,0.114813 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",40127.000000,40127.000000,40127.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.053162%,99.053162%,99.053162% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000107,0.000107,0.000107 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671089454,671089454,671089454 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",39,39,39 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",670840737,670840737,670840737 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",96877,96877,96877 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.028944%,0.028944%,0.028944% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",828.714455GB/s,828.714455GB/s,828.714455GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",122.547971MB/s,122.547971MB/s,122.547970MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",829.045998GB/s,829.045998GB/s,829.045998GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",829.020699GB/s,829.020699GB/s,829.020699GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",829.021704GB/s,829.021704GB/s,829.021704GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",50.517578KB/s,50.517578KB/s,50.516602KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.476562KB/s,6.476562KB/s,6.475586KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",170439680,170439680,170439680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",170457959,170457959,170457959 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003389%,0.003389%,0.003389% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.358214%,0.358214%,0.358214% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.634804%,99.634804%,99.634804% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001114%,0.001114%,0.001114% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000434%,0.000434%,0.000434% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001676%,0.001676%,0.001676% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000118%,0.000118%,0.000118% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4069294080,4069294080,4069294080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",170457959,170457959,170457959 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000242%,0.000242%,0.000242% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.078243,0.078243,0.078243 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.071756,0.071756,0.071756 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.793912%,1.793912%,1.793912% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",97.442245%,97.442245%,97.442245% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999003,0.999003,0.999003 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.071911,0.071911,0.071911 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",30399.000000,30399.000000,30399.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.750164%,98.750164%,98.750164% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000208,0.000208,0.000208 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",17236646,17236646,17236646 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",211694,211694,211694 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",69554,69554,69554 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",95.001647%,95.001647%,95.001647% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",97.936678%,97.936678%,97.936678% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",1.872134GB/s,1.872134GB/s,1.872134GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",629.869418MB/s,629.869418MB/s,629.869417MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5935.011927GB/s,5935.011927GB/s,5935.011927GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",148.321883GB/s,148.321883GB/s,148.321883GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",152.433779GB/s,152.433779GB/s,152.433779GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.370117KB/s,148.370117KB/s,148.369141KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.365234KB/s,46.365234KB/s,46.364258KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",155642880,155642880,155642880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",120657262,120657262,120657262 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.024055%,0.024055%,0.024055% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.910634%,1.910634%,1.910634% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",68.314256%,68.314256%,68.314256% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",28.874931%,28.874931%,28.874931% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003181%,0.003181%,0.003181% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.026777%,0.026777%,0.026777% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000858%,0.000858%,0.000858% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2475458560,2475458560,2475458560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",120657262,120657262,120657262 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",16771688,16771688,16771688 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000067%,0.000067%,0.000067% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.845240%,0.845240%,0.845240% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.358167,0.358167,0.358167 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.358241,0.358241,0.358241 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",8.956037%,8.956037%,8.956037% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.808005%,99.808005%,99.808005% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.996787,0.996787,0.996787 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.883130,0.883130,0.883130 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39743.000000,39743.000000,39743.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.044014%,99.044014%,99.044014% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000108,0.000108,0.000108 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",669253998,669253998,669253998 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",54,54,54 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",667154010,667154010,667154010 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",97135,97135,97135 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.273712%,0.273712%,0.273712% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.300564%,0.300564%,0.300564% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",823.877781GB/s,823.877781GB/s,823.877781GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",122.832245MB/s,122.832245MB/s,122.832244MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",828.762002GB/s,828.762002GB/s,828.762002GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",826.468357GB/s,826.468357GB/s,826.468357GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",826.471086GB/s,826.471086GB/s,826.471086GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",69.923828KB/s,69.923828KB/s,69.922852KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.473633KB/s,6.473633KB/s,6.472656KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",203484160,203484160,203484160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",168491834,168491834,168491834 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003299%,0.003299%,0.003299% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.357733%,0.357733%,0.357733% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.632178%,99.632178%,99.632178% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.005178%,0.005178%,0.005178% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000438%,0.000438%,0.000438% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000750%,0.000750%,0.000750% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000113%,0.000113%,0.000113% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4006379520,4006379520,4006379520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",168491834,168491834,168491834 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",669251788,669251788,669251788 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000302%,0.000302%,0.000302% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.071785,0.071785,0.071785 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.071664,0.071664,0.071664 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.791596%,1.791596%,1.791596% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",97.306519%,97.306519%,97.306519% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999233,0.999233,0.999233 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.071857,0.071857,0.071857 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",10434.000000,10434.000000,10434.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.773840%,98.773840%,98.773840% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000603,0.000603,0.000603 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",9418698,9418698,9418698 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772205,167772205,167772205 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",5185794,5185794,5185794 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",26264382,26264382,26264382 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1139.168061GB/s,1139.168061GB/s,1139.168061GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1139.168061GB/s,1139.168061GB/s,1139.168061GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",13.351996%,13.351996%,13.351996% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",72.461396%,72.461396%,72.461396% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",85.416710%,85.416710%,85.416710% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",35.211390GB/s,35.211390GB/s,35.211390GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",178.334386GB/s,178.334386GB/s,178.334386GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",498.525085GB/s,498.525085GB/s,498.525085GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",63.819098GB/s,63.819098GB/s,63.819098GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1139.168061GB/s,1139.168061GB/s,1139.168061GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",63.952684GB/s,63.952684GB/s,63.952684GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1139.168367GB/s,1139.168367GB/s,1139.168367GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",35.598633KB/s,35.598633KB/s,35.597656KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",36705280,36705280,36705280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",53422080,53422080,53422080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",35800124,35800124,35800124 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007481%,0.007481%,0.007481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.069688%,0.069688%,0.069688% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",3.869130%,3.869130%,3.869130% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",90.254281%,90.254281%,90.254281% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.013296%,0.013296%,0.013296% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003650%,0.003650%,0.003650% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000670%,0.000670%,0.000670% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",305397760,305397760,305397760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",817889280,817889280,817889280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",35800124,35800124,35800124 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",25569280,25569280,25569280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",25569280,25569280,25569280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",9399024,9399024,9399024 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",5.493894%,5.493894%,5.493894% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.287911%,0.287911%,0.287911% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.088975,0.088975,0.088975 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.089008,0.089008,0.089008 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.225196%,2.225196%,2.225196% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",91.419829%,91.419829%,91.419829% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.985513,0.985513,0.985513 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.270423,0.270423,0.270423 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39359.000000,39359.000000,39359.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.034687%,99.034687%,99.034687% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000132,0.000132,0.000132 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671066642,671066642,671066642 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",51,51,51 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",665959710,665959710,665959710 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",97043,97043,97043 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.006695%,0.006695%,0.006695% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.870117%,0.870117%,0.870117% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",816.472842GB/s,816.472842GB/s,816.472842GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",121.831041MB/s,121.831041MB/s,121.831040MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",822.786067GB/s,822.786067GB/s,822.786067GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",822.705876GB/s,822.705876GB/s,822.705876GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",822.733988GB/s,822.733988GB/s,822.733988GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",65.563477KB/s,65.563477KB/s,65.562500KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.427734KB/s,6.427734KB/s,6.426758KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",166507520,166507520,166507520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",166529426,166529426,166529426 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003275%,0.003275%,0.003275% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.356319%,0.356319%,0.356319% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.626052%,99.626052%,99.626052% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.012646%,0.012646%,0.012646% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000441%,0.000441%,0.000441% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000759%,0.000759%,0.000759% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000123%,0.000123%,0.000123% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3943464960,3943464960,3943464960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",166529426,166529426,166529426 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671043712,671043712,671043712 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000374%,0.000374%,0.000374% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.078707,0.078707,0.078707 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.071740,0.071740,0.071740 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.793509%,1.793509%,1.793509% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",95.631738%,95.631738%,95.631738% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999339,0.999339,0.999339 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.071981,0.071981,0.071981 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31551.000000,31551.000000,31551.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.795799%,98.795799%,98.795799% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000099,0.000099,0.000099 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",325160878,325160878,325160878 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",23252,23252,23252 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",324651498,324651498,324651498 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96455,96455,96455 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.087281%,3.087281%,3.087281% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.080408%,50.080408%,50.080408% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",825.873746GB/s,825.873746GB/s,825.873746GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",251.258609MB/s,251.258609MB/s,251.258608MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1707.219607GB/s,1707.219607GB/s,1707.219607GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",827.231225GB/s,827.231225GB/s,827.231225GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",827.169546GB/s,827.169546GB/s,827.169546GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",60.569853MB/s,60.569853MB/s,60.569852MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.336914KB/s,13.336914KB/s,13.335938KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",161541120,161541120,161541120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",126543342,126543342,126543342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006860%,0.006860%,0.006860% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.562169%,0.562169%,0.562169% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.330791%,99.330791%,99.330791% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.095497%,0.095497%,0.095497% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000887%,0.000887%,0.000887% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002749%,0.002749%,0.002749% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000008%,0.000008%,0.000008% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2664202240,2664202240,2664202240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",126543342,126543342,126543342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",325185124,325185124,325185124 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000019%,0.000019%,0.000019% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001020%,0.001020%,0.001020% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.108892,0.108892,0.108892 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.108903,0.108903,0.108903 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.722568%,2.722568%,2.722568% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",98.975715%,98.975715%,98.975715% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999301,0.999301,0.999301 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.109596,0.109596,0.109596 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31039.000000,31039.000000,31039.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.775935%,98.775935%,98.775935% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000092,0.000092,0.000092 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",222177638,222177638,222177638 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",35,35,35 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",176506026,176506026,176506026 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",95947,95947,95947 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",33.867831%,33.867831%,33.867831% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",60.287083%,60.287083%,60.287083% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",672.353771GB/s,672.353771GB/s,672.353771GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",374.256769MB/s,374.256769MB/s,374.256768MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",2556.415538GB/s,2556.415538GB/s,2556.415538GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",845.280723GB/s,845.280723GB/s,845.280723GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",846.327891GB/s,846.327891GB/s,846.327891GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",139.798828KB/s,139.798828KB/s,139.797852KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",19.970703KB/s,19.970703KB/s,19.969727KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",158919680,158919680,158919680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",123924015,123924015,123924015 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.009942%,0.009942%,0.009942% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.861903%,0.861903%,0.861903% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.835747%,98.835747%,98.835747% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.283452%,0.283452%,0.283452% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.001385%,0.001385%,0.001385% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002035%,0.002035%,0.002035% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000030%,0.000030%,0.000030% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2580316160,2580316160,2580316160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",123924015,123924015,123924015 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",221902736,221902736,221902736 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000030%,0.000030%,0.000030% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.005476%,0.005476%,0.005476% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.167183,0.167183,0.167183 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.167198,0.167198,0.167198 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",4.179948%,4.179948%,4.179948% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",94.422622%,94.422622%,94.422622% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999264,0.999264,0.999264 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.169476,0.169476,0.169476 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39871.000000,39871.000000,39871.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.047083%,99.047083%,99.047083% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000106,0.000106,0.000106 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671090126,671090126,671090126 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",23566,23566,23566 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",668913622,668913622,668913622 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",96866,96866,96866 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.327346%,0.327346%,0.327346% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",819.687934GB/s,819.687934GB/s,819.687934GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",121.548561MB/s,121.548561MB/s,121.548560MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",822.378301GB/s,822.378301GB/s,822.378301GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",822.353205GB/s,822.353205GB/s,822.353205GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",822.355026GB/s,822.355026GB/s,822.355026GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",29.570885MB/s,29.570885MB/s,29.570884MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.423828KB/s,6.423828KB/s,6.422852KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",204139520,204139520,204139520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",169150479,169150479,169150479 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003450%,0.003450%,0.003450% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.358276%,0.358276%,0.358276% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.634793%,99.634793%,99.634793% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001127%,0.001127%,0.001127% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000437%,0.000437%,0.000437% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001471%,0.001471%,0.001471% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000121%,0.000121%,0.000121% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4027351040,4027351040,4027351040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",169150479,169150479,169150479 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000317%,0.000317%,0.000317% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.071757,0.071757,0.071757 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.071765,0.071765,0.071765 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.794125%,1.794125%,1.794125% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",96.747941%,96.747941%,96.747941% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999138,0.999138,0.999138 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072008,0.072008,0.072008 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",14146.000000,14146.000000,14146.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.095592%,99.095592%,99.095592% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000585,0.000585,0.000585 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",33480670,33480670,33480670 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910288,83910288,83910288 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",33486674,33486674,33486674 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",52575980,52575980,52575980 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",668.175317GB/s,668.175317GB/s,668.175317GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",668.175317GB/s,668.175317GB/s,668.175317GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",39.483294%,39.483294%,39.483294% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.487803%,50.487803%,50.487803% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",37.551723%,37.551723%,37.551723% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",266.730416GB/s,266.730416GB/s,266.730416GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",418.781902GB/s,418.781902GB/s,418.781902GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1754.123337GB/s,1754.123337GB/s,1754.123337GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",266.901845GB/s,266.901845GB/s,266.901845GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",668.175317GB/s,668.175317GB/s,668.175317GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",266.682593GB/s,266.682593GB/s,266.682593GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",668.368141GB/s,668.368141GB/s,668.368141GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",41.760742KB/s,41.760742KB/s,41.759766KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",110105600,110105600,110105600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",54784000,54784000,54784000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",54817269,54817269,54817269 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.010258%,0.010258%,0.010258% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.107824%,0.107824%,0.107824% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",4.697665%,4.697665%,4.697665% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",92.877292%,92.877292%,92.877292% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.005895%,0.005895%,0.005895% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006630%,0.006630%,0.006630% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001046%,0.001046%,0.001046% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",557056000,557056000,557056000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1111490560,1111490560,1111490560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",73728000,73728000,73728000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",54817269,54817269,54817269 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",34744320,34744320,34744320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",34744320,34744320,34744320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",33508196,33508196,33508196 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.436995%,1.436995%,1.436995% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.856396%,0.856396%,0.856396% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.163440,0.163440,0.163440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.150261,0.150261,0.150261 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",3.756531%,3.756531%,3.756531% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",95.762524%,95.762524%,95.762524% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.990117,0.990117,0.990117 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.694096,0.694096,0.694096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31295.000000,31295.000000,31295.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.785948%,98.785948%,98.785948% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000198,0.000198,0.000198 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",13420554,13420554,13420554 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",22153,22153,22153 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",259560,259560,259560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",95437,95437,95437 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",95.967336%,95.967336%,95.967336% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",96.204215%,96.204215%,96.204215% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",2.296074GB/s,2.296074GB/s,2.296074GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",864.499610MB/s,864.499610MB/s,864.499609MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5936.646737GB/s,5936.646737GB/s,5936.646737GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",119.698864GB/s,119.698864GB/s,119.698864GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",118.718530GB/s,118.718530GB/s,118.718530GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",200.669130MB/s,200.669130MB/s,200.669129MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.377930KB/s,46.377930KB/s,46.376953KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",160230400,160230400,160230400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",125249165,125249165,125249165 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.026767%,0.026767%,0.026767% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.938159%,1.938159%,1.938159% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",68.759436%,68.759436%,68.759436% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",28.416835%,28.416835%,28.416835% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003146%,0.003146%,0.003146% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.027045%,0.027045%,0.027045% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001012%,0.001012%,0.001012% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2622259200,2622259200,2622259200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",125249165,125249165,125249165 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",13531376,13531376,13531376 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.827533%,0.827533%,0.827533% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.371793,0.371793,0.371793 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.371867,0.371867,0.371867 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.296667%,9.296667%,9.296667% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.787556%,99.787556%,99.787556% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.998533,0.998533,0.998533 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.895858,0.895858,0.895858 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",8710.000000,8710.000000,8710.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.531142%,98.531142%,98.531142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000340,0.000340,0.000340 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",2966,2966,2966 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886096,83886096,83886096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",166,166,166 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",195,195,195 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",140.135998GB/s,140.135998GB/s,140.135998GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",140.135998GB/s,140.135998GB/s,140.135998GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.879481%,5.879481%,5.879481% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",98.750000%,98.750000%,98.750000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999924%,99.999924%,99.999924% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",290.781250KB/s,290.781250KB/s,290.780273KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",341.581055KB/s,341.581055KB/s,341.580078KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",17.551213GB/s,17.551213GB/s,17.551213GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",4.379250MB/s,4.379250MB/s,4.379249MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",140.135998GB/s,140.135998GB/s,140.135998GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",5.073771MB/s,5.073771MB/s,5.073771MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",140.136025GB/s,140.136025GB/s,140.136025GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",8.757812KB/s,8.757812KB/s,8.756836KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",26951680,26951680,26951680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",26960841,26960841,26960841 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.001988%,0.001988%,0.001988% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.009403%,0.009403%,0.009403% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.464406%,0.464406%,0.464406% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.068480%,0.068480%,0.068480% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.002774%,0.002774%,0.002774% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001885%,0.001885%,0.001885% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000034%,0.000034%,0.000034% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",159088640,159088640,159088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",491520,491520,491520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",26960841,26960841,26960841 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2560,2560,2560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",99.143252%,99.143252%,99.143252% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.307778%,0.307778%,0.307778% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.027532,0.027532,0.027532 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.016800,0.016800,0.016800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",0.420003%,0.420003%,0.420003% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",89.792350%,89.792350%,89.792350% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.993289,0.993289,0.993289 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.211839,0.211839,0.211839 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31551.000000,31551.000000,31551.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.795799%,98.795799%,98.795799% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000087,0.000087,0.000087 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",117412326,117412326,117412326 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",80641030,80641030,80641030 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",97099,97099,97099 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",65.030969%,65.030969%,65.030969% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",65.734770%,65.734770%,65.734770% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",528.241457GB/s,528.241457GB/s,528.241457GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",651.315074MB/s,651.315074MB/s,651.315073MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",4396.120180GB/s,4396.120180GB/s,4396.120180GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",768.616850GB/s,768.616850GB/s,768.616850GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",769.112921GB/s,769.112921GB/s,769.112921GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",247.273438KB/s,247.273438KB/s,247.272461KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",34.342773KB/s,34.342773KB/s,34.341797KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",126530560,126530560,126530560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",126541568,126541568,126541568 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (7)","High (7)","High (7)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.017058%,0.017058%,0.017058% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.495510%,1.495510%,1.495510% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.420884%,98.420884%,98.420884% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.053910%,0.053910%,0.053910% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.002404%,0.002404%,0.002404% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.007888%,0.007888%,0.007888% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000545%,0.000545%,0.000545% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2664202240,2664202240,2664202240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",126541568,126541568,126541568 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",117336596,117336596,117336596 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000050%,0.000050%,0.000050% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001749%,0.001749%,0.001749% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.229791,0.229791,0.229791 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.287864,0.287864,0.287864 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",7.196591%,7.196591%,7.196591% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",96.303878%,96.303878%,96.303878% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.999186,0.999186,0.999186 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.288976,0.288976,0.288976 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31807.000000,31807.000000,31807.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.805491%,98.805491%,98.805491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000098,0.000098,0.000098 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",335545206,335545206,335545206 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",36,36,36 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",335436606,335436606,335436606 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",109316,109316,109316 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.022705%,50.022705%,50.022705% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",826.932738GB/s,826.932738GB/s,826.932738GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",275.958273MB/s,275.958273MB/s,275.958272MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1654.447047GB/s,1654.447047GB/s,1654.447047GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",827.198279GB/s,827.198279GB/s,827.198279GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",827.200464GB/s,827.200464GB/s,827.200464GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",93.059570KB/s,93.059570KB/s,93.058594KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",12.924805KB/s,12.924805KB/s,12.923828KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",162851840,162851840,162851840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",127857474,127857474,127857474 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006161%,0.006161%,0.006161% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.549282%,0.549282%,0.549282% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.344563%,99.344563%,99.344563% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.094415%,0.094415%,0.094415% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000858%,0.000858%,0.000858% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003677%,0.003677%,0.003677% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000016%,0.000016%,0.000016% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2706145280,2706145280,2706145280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",127857474,127857474,127857474 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000018%,0.000018%,0.000018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001010%,0.001010%,0.001010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.106494,0.106494,0.106494 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.106504,0.106504,0.106504 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.662610%,2.662610%,2.662610% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.075737%,99.075737%,99.075737% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999303,0.999303,0.999303 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.107216,0.107216,0.107216 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",20417.000000,20417.000000,20417.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.766041%,98.766041%,98.766041% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000718,0.000718,0.000718 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",140785502,140785502,140785502 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772201,167772201,167772201 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",137865476,137865476,137865476 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",144271747,144271747,144271747 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",377.208945GB/s,377.208945GB/s,377.208945GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",377.208945GB/s,377.208945GB/s,377.208945GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.339435%,5.339435%,5.339435% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.051452%,51.051452%,51.051452% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",14.876707%,14.876707%,14.876707% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",309.968536GB/s,309.968536GB/s,309.968536GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",324.372014GB/s,324.372014GB/s,324.372014GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",707.312818GB/s,707.312818GB/s,707.312818GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",314.610531GB/s,314.610531GB/s,314.610531GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",377.208945GB/s,377.208945GB/s,377.208945GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",316.533748GB/s,316.533748GB/s,316.533748GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",377.209037GB/s,377.209037GB/s,377.209037GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",11.787109KB/s,11.787109KB/s,11.786133KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",157291520,157291520,157291520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",69524480,69524480,69524480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",69574421,69574421,69574421 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007641%,0.007641%,0.007641% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.069847%,0.069847%,0.069847% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",9.241937%,9.241937%,9.241937% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",90.017473%,90.017473%,90.017473% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.004838%,0.004838%,0.004838% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006518%,0.006518%,0.006518% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000428%,0.000428%,0.000428% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",756121600,756121600,756121600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1300234240,1300234240,1300234240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",147128320,147128320,147128320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",69574421,69574421,69574421 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",40642560,40642560,40642560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",40642560,40642560,40642560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",139930108,139930108,139930108 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.270627%,0.270627%,0.270627% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.380690%,0.380690%,0.380690% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.088260,0.088260,0.088260 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.064388,0.064388,0.064388 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.609693%,1.609693%,1.609693% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",81.676533%,81.676533%,81.676533% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.743495,0.743495,0.743495 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.244499,0.244499,0.244499 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",40127.000000,40127.000000,40127.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.053162%,99.053162%,99.053162% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000126,0.000126,0.000126 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671090486,671090486,671090486 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",41,41,41 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",670780188,670780188,670780188 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",109936,109936,109936 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.037689%,0.037689%,0.037689% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",828.677853GB/s,828.677853GB/s,828.677853GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",139.073823MB/s,139.073823MB/s,139.073822MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",829.084214GB/s,829.084214GB/s,829.084214GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",829.058913GB/s,829.058913GB/s,829.058913GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",829.061193GB/s,829.061193GB/s,829.061193GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",53.111328KB/s,53.111328KB/s,53.110352KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.476562KB/s,6.476562KB/s,6.475586KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",170439680,170439680,170439680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",170461175,170461175,170461175 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003228%,0.003228%,0.003228% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.359202%,0.359202%,0.359202% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.634345%,99.634345%,99.634345% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001591%,0.001591%,0.001591% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000442%,0.000442%,0.000442% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000761%,0.000761%,0.000761% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000117%,0.000117%,0.000117% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4069294080,4069294080,4069294080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",170461175,170461175,170461175 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000304%,0.000304%,0.000304% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.078839,0.078839,0.078839 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.072002,0.072002,0.072002 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.800056%,1.800056%,1.800056% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",98.155700%,98.155700%,98.155700% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999085,0.999085,0.999085 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072197,0.072197,0.072197 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",16834.000000,16834.000000,16834.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.240005%,99.240005%,99.240005% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001297,0.001297,0.001297 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",312781226,312781226,312781226 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544375,335544375,335544375 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",311885298,311885298,311885298 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",313607964,313607964,313607964 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",386.446995GB/s,386.446995GB/s,386.446995GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",386.446995GB/s,386.446995GB/s,386.446995GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",386.446995GB/s,386.446995GB/s,386.446995GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.252030%,0.252030%,0.252030% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.241832%,0.241832%,0.241832% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",6.801003%,6.801003%,6.801003% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",359.198857GB/s,359.198857GB/s,359.198857GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",361.182854GB/s,361.182854GB/s,361.182854GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",362.317644GB/s,362.317644GB/s,362.317644GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",360.407003GB/s,360.407003GB/s,360.407003GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",386.446995GB/s,386.446995GB/s,386.446995GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",360.230699GB/s,360.230699GB/s,360.230699GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",386.447058GB/s,386.447058GB/s,386.447058GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.038086KB/s,6.038086KB/s,6.037109KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",157291520,157291520,157291520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",86190080,86190080,86190080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",68635487,68635487,68635487 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.002902%,0.002902%,0.002902% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.044444%,0.044444%,0.044444% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",10.000981%,10.000981%,10.000981% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",89.648646%,89.648646%,89.648646% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.003183%,0.003183%,0.003183% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003258%,0.003258%,0.003258% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001075%,0.001075%,0.001075% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",777256960,777256960,777256960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1300234240,1300234240,1300234240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",105185280,105185280,105185280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",68635487,68635487,68635487 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",40642560,40642560,40642560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",40642560,40642560,40642560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",312934308,312934308,312934308 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.106084%,0.106084%,0.106084% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.189428%,0.189428%,0.189428% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.031696,0.031696,0.031696 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.031789,0.031789,0.031789 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.794720%,0.794720%,0.794720% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",83.381901%,83.381901%,83.381901% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.739340,0.739340,0.739340 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.121605,0.121605,0.121605 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_per_warp","Instructions per warp",8902.000000,8902.000000,8902.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.562823%,98.562823%,98.562823% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_replay_overhead","Instruction Replay Overhead",0.000755,0.000813,0.000784 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"gst_transactions","Global Store Transactions",67108864,67108864,67108864 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_read_transactions","L2 Read Transactions",10774,10814,10794 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_write_transactions","L2 Write Transactions",67108880,67108880,67108880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"dram_read_transactions","Device Memory Read Transactions",138,138,138 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"dram_write_transactions","Device Memory Write Transactions",872,971,921 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"gld_requested_throughput","Requested Global Load Throughput",261.310771GB/s,261.311488GB/s,261.311130GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"gst_requested_throughput","Requested Global Store Throughput",261.310771GB/s,261.311488GB/s,261.311130GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"gst_throughput","Global Store Throughput",261.310771GB/s,261.311488GB/s,261.311130GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"tex_cache_hit_rate","Unified Cache Hit Rate",3.015507%,3.015507%,3.015507% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",98.750000%,98.750000%,98.750000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.999809%,99.999809%,99.999809% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"dram_read_throughput","Device Memory Read Throughput",563.451172KB/s,563.452148KB/s,563.451172KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"dram_write_throughput","Device Memory Write Throughput",3.476922MB/s,3.871655MB/s,3.674288MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"tex_cache_throughput","Unified cache to SM throughput",8.181911GB/s,8.181933GB/s,8.181922GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",40.829807MB/s,40.829920MB/s,40.829863MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",261.310771GB/s,261.311488GB/s,261.311130GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_read_throughput","L2 Throughput (Reads)",42.959019MB/s,43.118628MB/s,43.038822MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_write_throughput","L2 Throughput (Writes)",261.310834GB/s,261.311551GB/s,261.311192GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"sysmem_write_throughput","System Memory Write Throughput",20.414062KB/s,20.414062KB/s,20.413086KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"tex_cache_transactions","Unified cache to SM transactions",1049600,1049600,1049600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_executed","Instructions Executed",9115648,9115648,9115648 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_issued","Instructions Issued",5591161,5591484,5591322 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.007129%,0.007242%,0.007186% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.028048%,0.028117%,0.028083% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.046133%,2.056063%,2.051098% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_texture","Issue Stall Reasons (Texture)",0.000913%,0.000932%,0.000922% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_other","Issue Stall Reasons (Other)",0.000944%,0.000971%,0.000958% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.006464%,0.008626%,0.007545% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000033%,0.000061%,0.000047% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_integer","Integer Instructions",36044800,36044800,36044800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_control","Control-Flow Instructions",2097152,2097152,2097152 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_compute_ld_st","Load/Store Instructions",138412032,138412032,138412032 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_misc","Misc Instructions",2162688,2162688,2162688 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"issue_slots","Issue Slots",5591161,5591484,5591322 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"cf_issued","Issued Control-Flow Instructions",67584,67584,67584 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"cf_executed","Executed Control-Flow Instructions",67584,67584,67584 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"ldst_issued","Issued Load/Store Instructions",4327424,4327424,4327424 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"ldst_executed","Executed Load/Store Instructions",4327424,4327424,4327424 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.777373%,97.789383%,97.783378% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.120744%,0.120824%,0.120784% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",67108864,67108864,67108864 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"ipc","Executed IPC",0.009050,0.009069,0.009059 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"issued_ipc","Issued IPC",0.009067,0.009078,0.009072 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"issue_slot_utilization","Issue Slot Utilization",0.226668%,0.226950%,0.226809% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"sm_efficiency","Multiprocessor Activity",80.396042%,80.408576%,80.402309% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"achieved_occupancy","Achieved Occupancy",0.210201,0.210237,0.210219 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.025161,0.025180,0.025171 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",2,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",12034.000000,12034.000000,12034.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.936866%,98.936866%,98.936866% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000697,0.000697,0.000697 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",34591838,34591838,34591838 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772201,167772201,167772201 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",34328506,34328506,34328506 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",54572596,54572596,54572596 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1047.193668GB/s,1047.193668GB/s,1047.193668GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1047.193668GB/s,1047.193668GB/s,1047.193668GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",10.177320%,10.177320%,10.177320% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.109976%,50.109976%,50.109976% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",68.051851%,68.051851%,68.051851% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",214.270318GB/s,214.270318GB/s,214.270318GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",340.629083GB/s,340.629083GB/s,340.629083GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",720.073478GB/s,720.073478GB/s,720.073478GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",216.760993GB/s,216.760993GB/s,216.760993GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1047.193668GB/s,1047.193668GB/s,1047.193668GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",215.913973GB/s,215.913973GB/s,215.913973GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1047.193923GB/s,1047.193923GB/s,1047.193923GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",32.724609KB/s,32.724609KB/s,32.723633KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",57676800,57676800,57676800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",61614080,61614080,61614080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",44001218,44001218,44001218 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (7)","High (7)","High (7)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006821%,0.006821%,0.006821% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.065785%,0.065785%,0.065785% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",4.343507%,4.343507%,4.343507% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",93.216692%,93.216692%,93.216692% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.011255%,0.011255%,0.011255% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003714%,0.003714%,0.003714% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000291%,0.000291%,0.000291% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",441712640,441712640,441712640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",901775360,901775360,901775360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",52756480,52756480,52756480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",44001218,44001218,44001218 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",28190720,28190720,28190720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",28190720,28190720,28190720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",34727540,34727540,34727540 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.986326%,1.986326%,1.986326% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.365609%,0.365609%,0.365609% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.096304,0.096304,0.096304 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.096388,0.096388,0.096388 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.409706%,2.409706%,2.409706% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",95.365956%,95.365956%,95.365956% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.987032,0.987032,0.987032 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.327384,0.327384,0.327384 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",11458.000000,11458.000000,11458.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.883422%,98.883422%,98.883422% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000698,0.000698,0.000698 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",78171602,78171602,78171602 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544361,335544361,335544361 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",75361694,75361694,75361694 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",108607088,108607088,108607088 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",981.292637GB/s,981.292637GB/s,981.292637GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",981.292637GB/s,981.292637GB/s,981.292637GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",981.292637GB/s,981.292637GB/s,981.292637GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",8.525650%,8.525650%,8.525650% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",1.916428%,1.916428%,1.916428% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",68.308122%,68.308122%,68.308122% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",220.393763GB/s,220.393763GB/s,220.393763GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",317.619252GB/s,317.619252GB/s,317.619252GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",337.379237GB/s,337.379237GB/s,337.379237GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",224.899106GB/s,224.899106GB/s,224.899106GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",981.292637GB/s,981.292637GB/s,981.292637GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",228.611283GB/s,228.611283GB/s,228.611283GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",981.292757GB/s,981.292757GB/s,981.292757GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",15.332031KB/s,15.332031KB/s,15.331055KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",57676800,57676800,57676800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",58664960,58664960,58664960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",41052682,41052682,41052682 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (7)","High (7)","High (7)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006576%,0.006576%,0.006576% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.060411%,0.060411%,0.060411% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",11.245035%,11.245035%,11.245035% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",82.197074%,82.197074%,82.197074% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.005399%,0.005399%,0.005399% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003997%,0.003997%,0.003997% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001259%,0.001259%,0.001259% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",378798080,378798080,378798080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",901775360,901775360,901775360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",21299200,21299200,21299200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",41052682,41052682,41052682 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",28190720,28190720,28190720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",28190720,28190720,28190720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",76902256,76902256,76902256 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",6.290318%,6.290318%,6.290318% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.189931%,0.189931%,0.189931% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.050696,0.050696,0.050696 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.050731,0.050731,0.050731 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.268283%,1.268283%,1.268283% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",79.316195%,79.316195%,79.316195% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.592674,0.592674,0.592674 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.122073,0.122073,0.122073 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31807.000000,31807.000000,31807.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.805491%,98.805491%,98.805491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000099,0.000099,0.000099 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",335545734,335545734,335545734 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",23374,23374,23374 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",335372534,335372534,335372534 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96315,96315,96315 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.017231%,50.017231%,50.017231% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",826.339366GB/s,826.339366GB/s,826.339366GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",243.010386MB/s,243.010386MB/s,243.010385MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1653.575736GB/s,1653.575736GB/s,1653.575736GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",826.762637GB/s,826.762637GB/s,826.762637GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",826.766121GB/s,826.766121GB/s,826.766121GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",58.974456MB/s,58.974456MB/s,58.974455MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",12.917969KB/s,12.917969KB/s,12.916992KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",127841280,127841280,127841280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",127857484,127857484,127857484 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006552%,0.006552%,0.006552% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.550190%,0.550190%,0.550190% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.340329%,99.340329%,99.340329% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.099765%,0.099765%,0.099765% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000860%,0.000860%,0.000860% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001241%,0.001241%,0.001241% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000016%,0.000016%,0.000016% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2706145280,2706145280,2706145280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",127857484,127857484,127857484 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000018%,0.000018%,0.000018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001030%,0.001030%,0.001030% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.113241,0.113241,0.113241 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.106663,0.106663,0.106663 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.666575%,2.666575%,2.666575% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",98.688420%,98.688420%,98.688420% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999305,0.999305,0.999305 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.107363,0.107363,0.107363 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",12610.000000,12610.000000,12610.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.985428%,98.985428%,98.985428% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000612,0.000612,0.000612 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",60779454,60779454,60779454 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167796291,167796291,167796291 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",60552566,60552566,60552566 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",77254252,77254252,77254252 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",872.759670GB/s,872.759670GB/s,872.759670GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",872.759670GB/s,872.759670GB/s,872.759670GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",7.308291%,7.308291%,7.308291% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.266324%,50.266324%,50.266324% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",54.257433%,54.257433%,54.257433% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",314.997658GB/s,314.997658GB/s,314.997658GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",401.880714GB/s,401.880714GB/s,401.880714GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",818.318728GB/s,818.318728GB/s,818.318728GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",315.423613GB/s,315.423613GB/s,315.423613GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",872.759670GB/s,872.759670GB/s,872.759670GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",316.177941GB/s,316.177941GB/s,316.177941GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",872.885200GB/s,872.885200GB/s,872.885200GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",27.273438KB/s,27.273438KB/s,27.272461KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",78648320,78648320,78648320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",64563200,64563200,64563200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",46948379,46948379,46948379 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006620%,0.006620%,0.006620% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.061570%,0.061570%,0.061570% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",8.462251%,8.462251%,8.462251% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",89.047026%,89.047026%,89.047026% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.006919%,0.006919%,0.006919% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003596%,0.003596%,0.003596% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000432%,0.000432%,0.000432% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",441712640,441712640,441712640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",985661440,985661440,985661440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",63242240,63242240,63242240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",46948379,46948379,46948379 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",30812160,30812160,30812160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",30812160,30812160,30812160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",60634448,60634448,60634448 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",2.020245%,2.020245%,2.020245% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.391342%,0.391342%,0.391342% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.084558,0.084558,0.084558 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.084701,0.084701,0.084701 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.117533%,2.117533%,2.117533% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",96.731983%,96.731983%,96.731983% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.990858,0.990858,0.990858 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.332034,0.332034,0.332034 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",40127.000000,40127.000000,40127.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.053162%,99.053162%,99.053162% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000106,0.000106,0.000106 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671089478,671089478,671089478 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",46,46,46 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",670939242,670939242,670939242 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",96794,96794,96794 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.031716%,0.031716%,0.031716% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",830.818030GB/s,830.818030GB/s,830.818030GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",122.735759MB/s,122.735759MB/s,122.735758MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",831.028388GB/s,831.028388GB/s,831.028388GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",831.003028GB/s,831.003028GB/s,831.003028GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",831.004065GB/s,831.004065GB/s,831.004065GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",59.727539KB/s,59.727539KB/s,59.726562KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.492188KB/s,6.492188KB/s,6.491211KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",205450240,205450240,205450240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",170457740,170457740,170457740 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003232%,0.003232%,0.003232% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.359737%,0.359737%,0.359737% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.631857%,99.631857%,99.631857% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.003328%,0.003328%,0.003328% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000436%,0.000436%,0.000436% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001034%,0.001034%,0.001034% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000116%,0.000116%,0.000116% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4069294080,4069294080,4069294080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",170457740,170457740,170457740 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000252%,0.000252%,0.000252% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.072075,0.072075,0.072075 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.072069,0.072069,0.072069 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.801714%,1.801714%,1.801714% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",98.167106%,98.167106%,98.167106% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999057,0.999057,0.999057 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072230,0.072230,0.072230 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",32195.000000,32195.000000,32195.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.819887%,98.819887%,98.819887% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000092,0.000092,0.000092 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",10342,10342,10342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",1034,1034,1034 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",684,684,684 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.998474%,99.998474%,99.998474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",4.707130MB/s,4.707130MB/s,4.707129MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",3.113808MB/s,3.113808MB/s,3.113807MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",2983.519890GB/s,2983.519890GB/s,2983.519890GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",46.616076MB/s,46.616076MB/s,46.616075MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",47.080415MB/s,47.080415MB/s,47.080414MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",74.584961KB/s,74.584961KB/s,74.583984KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.307617KB/s,23.307617KB/s,23.306641KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",164838400,164838400,164838400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",129840100,129840100,129840100 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008057%,0.008057%,0.008057% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.985024%,0.985024%,0.985024% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.999326%,98.999326%,98.999326% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.002932%,0.002932%,0.002932% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001541%,0.001541%,0.001541% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002250%,0.002250%,0.002250% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000247%,0.000247%,0.000247% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",2769715200,2769715200,2769715200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",129840100,129840100,129840100 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000590%,0.000590%,0.000590% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.193419,0.193419,0.193419 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.193437,0.193437,0.193437 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",4.835926%,4.835926%,4.835926% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",99.905513%,99.905513%,99.905513% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999455,0.999455,0.999455 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.193812,0.193812,0.193812 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",12802.000000,12802.000000,12802.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.000644%,99.000644%,99.000644% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001038,0.001038,0.001038 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",147137166,147137166,147137166 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335568528,335568528,335568528 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",144166546,144166546,144166546 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",155957884,155957884,155957884 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",657.200785GB/s,657.200785GB/s,657.200785GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",657.200785GB/s,657.200785GB/s,657.200785GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",657.200785GB/s,657.200785GB/s,657.200785GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",2.103196%,2.103196%,2.103196% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",1.753844%,1.753844%,1.753844% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",53.635920%,53.635920%,53.635920% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",282.366178GB/s,282.366178GB/s,282.366178GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",305.460822GB/s,305.460822GB/s,305.460822GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",308.102980GB/s,308.102980GB/s,308.102980GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",287.761481GB/s,287.761481GB/s,287.761481GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",657.200785GB/s,657.200785GB/s,657.200785GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",288.184467GB/s,288.184467GB/s,288.184467GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",657.248199GB/s,657.248199GB/s,657.248199GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",10.268555KB/s,10.268555KB/s,10.267578KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",78648320,78648320,78648320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",65546240,65546240,65546240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",47952006,47952006,47952006 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.004634%,0.004634%,0.004634% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.051328%,0.051328%,0.051328% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",14.939660%,14.939660%,14.939660% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",80.902843%,80.902843%,80.902843% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.005142%,0.005142%,0.005142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002795%,0.002795%,0.002795% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001229%,0.001229%,0.001229% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",462684160,462684160,462684160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",985661440,985661440,985661440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",73728000,73728000,73728000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",47952006,47952006,47952006 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",30812160,30812160,30812160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",30812160,30812160,30812160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",146921204,146921204,146921204 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",3.922099%,3.922099%,3.922099% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.170269%,0.170269%,0.170269% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.041923,0.041923,0.041923 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.041967,0.041967,0.041967 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.049166%,1.049166%,1.049166% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",75.026497%,75.026497%,75.026497% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.738834,0.738834,0.738834 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.122519,0.122519,0.122519 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",14274.000000,14274.000000,14274.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.103703%,99.103703%,99.103703% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000768,0.000768,0.000768 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",74141662,74141662,74141662 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167772197,167772197,167772197 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",72334378,72334378,72334378 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",92149465,92149465,92149465 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",568.413317GB/s,568.413317GB/s,568.413317GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",568.413317GB/s,568.413317GB/s,568.413317GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",9.368520%,9.368520%,9.368520% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",51.191014%,51.191014%,51.191014% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",46.000845%,46.000845%,46.000845% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",245.069407GB/s,245.069407GB/s,245.069407GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",312.203068GB/s,312.203068GB/s,312.203068GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",675.060201GB/s,675.060201GB/s,675.060201GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",252.625172GB/s,252.625172GB/s,252.625172GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",568.413317GB/s,568.413317GB/s,568.413317GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",251.192499GB/s,251.192499GB/s,251.192499GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",568.413443GB/s,568.413443GB/s,568.413443GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",17.762695KB/s,17.762695KB/s,17.761719KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",99619840,99619840,99619840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",55439360,55439360,55439360 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",55479662,55479662,55479662 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (7)","High (7)","High (7)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006509%,0.006509%,0.006509% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.096086%,0.096086%,0.096086% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",9.439891%,9.439891%,9.439891% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",89.056961%,89.056961%,89.056961% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.009350%,0.009350%,0.009350% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004085%,0.004085%,0.004085% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000507%,0.000507%,0.000507% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",588513280,588513280,588513280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1069547520,1069547520,1069547520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",105185280,105185280,105185280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",55479662,55479662,55479662 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",33433600,33433600,33433600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",33433600,33433600,33433600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",74564528,74564528,74564528 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.997384%,0.997384%,0.997384% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.389227%,0.389227%,0.389227% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.098414,0.098414,0.098414 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.082763,0.082763,0.082763 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.069086%,2.069086%,2.069086% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",76.346522%,76.346522%,76.346522% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.738918,0.738918,0.738918 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.266575,0.266575,0.266575 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",14146.000000,14146.000000,14146.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.095592%,99.095592%,99.095592% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000969,0.000969,0.000969 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",193598302,193598302,193598302 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335568522,335568522,335568522 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",190245846,190245846,190245846 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",199579464,199579464,199579464 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",551.340709GB/s,551.340709GB/s,551.340709GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",551.340709GB/s,551.340709GB/s,551.340709GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",551.340709GB/s,551.340709GB/s,551.340709GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",1.186790%,1.186790%,1.186790% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",1.324110%,1.324110%,1.324110% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",40.810605%,40.810605%,40.810605% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",312.597393GB/s,312.597393GB/s,312.597393GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",327.933679GB/s,327.933679GB/s,327.933679GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",327.392197GB/s,327.392197GB/s,327.392197GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",316.930227GB/s,316.930227GB/s,316.930227GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",551.340709GB/s,551.340709GB/s,551.340709GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",318.105892GB/s,318.105892GB/s,318.105892GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",551.380476GB/s,551.380476GB/s,551.380476GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",8.614258KB/s,8.614258KB/s,8.613281KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",99619840,99619840,99619840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",72427520,72427520,72427520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",54837104,54837104,54837104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.004147%,0.004147%,0.004147% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.056384%,0.056384%,0.056384% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",15.661935%,15.661935%,15.661935% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",81.627703%,81.627703%,81.627703% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.004542%,0.004542%,0.004542% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003863%,0.003863%,0.003863% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001026%,0.001026%,0.001026% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",546570240,546570240,546570240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1069547520,1069547520,1069547520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",126156800,126156800,126156800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",54837104,54837104,54837104 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",33433600,33433600,33433600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",33433600,33433600,33433600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",192882796,192882796,192882796 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",2.471756%,2.471756%,2.471756% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.168643%,0.168643%,0.168643% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.038614,0.038614,0.038614 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.038590,0.038590,0.038590 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.964745%,0.964745%,0.964745% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",78.231430%,78.231430%,78.231430% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.737910,0.737910,0.737910 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.118167,0.118167,0.118167 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",23871.000000,23871.000000,23871.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408372%,98.408372%,98.408372% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000295,0.000295,0.000295 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",20582,20582,20582 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",20578,20578,20578 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",10778,10778,10778 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.993896%,99.993896%,99.993896% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",186.895326MB/s,186.895326MB/s,186.895325MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",97.888902MB/s,97.888902MB/s,97.888901MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5952.350021GB/s,5952.350021GB/s,5952.350021GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",186.005261MB/s,186.005261MB/s,186.005260MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",186.931655MB/s,186.931655MB/s,186.931654MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.803711KB/s,148.803711KB/s,148.802734KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.500977KB/s,46.500977KB/s,46.500000KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",87208960,87208960,87208960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",87234754,87234754,87234754 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023671%,0.023671%,0.023671% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.325970%,1.325970%,1.325970% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",54.186400%,54.186400%,54.186400% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",43.345271%,43.345271%,43.345271% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.002925%,0.002925%,0.002925% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.028324%,0.028324%,0.028324% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000784%,0.000784%,0.000784% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",1405911040,1405911040,1405911040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",87234754,87234754,87234754 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",20480,20480,20480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000062%,0.000062%,0.000062% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.086592%,1.086592%,1.086592% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.346988,0.346988,0.346988 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.259661,0.259661,0.259661 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",6.491515%,6.491515%,6.491515% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.788821%,99.788821%,99.788821% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.985428,0.985428,0.985428 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.989652,0.989652,0.989652 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",9154.000000,9154.000000,9154.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.602387%,98.602387%,98.602387% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000293,0.000293,0.000293 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",61542,61542,61542 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83886144,83886144,83886144 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",61534,61534,61534 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",1956928,1956928,1956928 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1155.164463GB/s,1155.164463GB/s,1155.164463GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1155.164463GB/s,1155.164463GB/s,1155.164463GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",15.727796%,15.727796%,15.727796% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.903674%,99.903674%,99.903674% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",867.698853MB/s,867.698853MB/s,867.698852MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",26.948138GB/s,26.948138GB/s,26.948138GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",433.468696GB/s,433.468696GB/s,433.468696GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",866.373347MB/s,866.373347MB/s,866.373346MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1155.164463GB/s,1155.164463GB/s,1155.164463GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",867.811662MB/s,867.811662MB/s,867.811661MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1155.165344GB/s,1155.165344GB/s,1155.165344GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",72.197266KB/s,72.197266KB/s,72.196289KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",15733760,15733760,15733760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",29224960,29224960,29224960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",29233363,29233363,29233363 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.017385%,0.017385%,0.017385% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.112587%,0.112587%,0.112587% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",1.172169%,1.172169%,1.172169% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.836295%,0.836295%,0.836295% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.008168%,0.008168%,0.008168% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.005755%,0.005755%,0.005755% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000171%,0.000171%,0.000171% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",179568640,179568640,179568640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",734003200,734003200,734003200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",29233363,29233363,29233363 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",22947840,22947840,22947840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",22947840,22947840,22947840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",61440,61440,61440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",95.225702%,95.225702%,95.225702% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",2.621768%,2.621768%,2.621768% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.218279,0.218279,0.218279 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.154743,0.154743,0.154743 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",3.868571%,3.868571%,3.868571% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",90.211731%,90.211731%,90.211731% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.962141,0.962141,0.962141 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.766204,1.766204,1.766204 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",17474.000000,17474.000000,17474.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.267841%,99.267841%,99.267841% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000786,0.000786,0.000786 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",70689458,70689458,70689458 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910260,83910260,83910260 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",70361474,70361474,70361474 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",75642782,75642782,75642782 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",398.020913GB/s,398.020913GB/s,398.020913GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",398.020913GB/s,398.020913GB/s,398.020913GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",38.393019%,38.393019%,38.393019% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.178598%,50.178598%,50.178598% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",10.466008%,10.466008%,10.466008% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",333.849646GB/s,333.849646GB/s,333.849646GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",358.908285GB/s,358.908285GB/s,358.908285GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1592.180824GB/s,1592.180824GB/s,1592.180824GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",337.605091GB/s,337.605091GB/s,337.605091GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",398.020913GB/s,398.020913GB/s,398.020913GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",335.405857GB/s,335.405857GB/s,335.405857GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",398.135641GB/s,398.135641GB/s,398.135641GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",24.875977KB/s,24.875977KB/s,24.875000KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",167777280,167777280,167777280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",89466880,89466880,89466880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",71871112,71871112,71871112 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006730%,0.006730%,0.006730% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.105030%,0.105030%,0.105030% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",2.333119%,2.333119%,2.333119% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",96.115155%,96.115155%,96.115155% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.004871%,0.004871%,0.004871% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.008888%,0.008888%,0.008888% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000626%,0.000626%,0.000626% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",787742720,787742720,787742720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",157614080,157614080,157614080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",71871112,71871112,71871112 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",41953280,41953280,41953280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",41953280,41953280,41953280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",71152964,71152964,71152964 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.642688%,0.642688%,0.642688% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.782892%,0.782892%,0.782892% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.115375,0.115375,0.115375 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.115466,0.115466,0.115466 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",2.886652%,2.886652%,2.886652% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",98.767242%,98.767242%,98.767242% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.992591,0.992591,0.992591 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.609741,0.609741,0.609741 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",30399.000000,30399.000000,30399.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.750164%,98.750164%,98.750164% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000212,0.000212,0.000212 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",102502,102502,102502 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",102410,102410,102410 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",38406,38406,38406 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.969482%,99.969482%,99.969482% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",929.245441MB/s,929.245441MB/s,929.245440MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",348.487456MB/s,348.487456MB/s,348.487455MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5946.771582GB/s,5946.771582GB/s,5946.771582GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",929.154704MB/s,929.154704MB/s,929.154703MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",930.080229MB/s,930.080229MB/s,930.080228MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.664062KB/s,148.664062KB/s,148.663086KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.457031KB/s,46.457031KB/s,46.456055KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",155642880,155642880,155642880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",120657897,120657897,120657897 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023792%,0.023792%,0.023792% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.886875%,1.886875%,1.886875% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",53.316906%,53.316906%,53.316906% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",43.590990%,43.590990%,43.590990% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003123%,0.003123%,0.003123% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.024233%,0.024233%,0.024233% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001119%,0.001119%,0.001119% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2475458560,2475458560,2475458560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",120657897,120657897,120657897 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",102400,102400,102400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000066%,0.000066%,0.000066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.152895%,1.152895%,1.152895% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.358644,0.358644,0.358644 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.358872,0.358872,0.358872 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",8.971789%,8.971789%,8.971789% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.711832%,99.711832%,99.711832% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.977276,0.977276,0.977276 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.085256,1.085256,1.085256 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",10370.000000,10370.000000,10370.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.766273%,98.766273%,98.766273% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000468,0.000468,0.000468 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",868698,868698,868698 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910348,83910348,83910348 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",148742,148742,148742 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",4735106,4735106,4735106 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1222.513017GB/s,1222.513017GB/s,1222.513017GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1222.513017GB/s,1222.513017GB/s,1222.513017GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",29.727929%,29.727929%,29.727929% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",92.086282%,92.086282%,92.086282% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",95.090060%,95.090060%,95.090060% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",2.167690GB/s,2.167690GB/s,2.167690GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",69.007024GB/s,69.007024GB/s,69.007024GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",1069.997355GB/s,1069.997355GB/s,1069.997355GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",12.421977GB/s,12.421977GB/s,12.421977GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1222.513017GB/s,1222.513017GB/s,1222.513017GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",12.659962GB/s,12.659962GB/s,12.659962GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1222.866687GB/s,1222.866687GB/s,1222.866687GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",76.406250KB/s,76.406250KB/s,76.405273KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",36705280,36705280,36705280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",53094400,53094400,53094400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",35467463,35467463,35467463 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.018142%,0.018142%,0.018142% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.159695%,0.159695%,0.159695% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.570069%,0.570069%,0.570069% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",94.420359%,94.420359%,94.420359% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.009894%,0.009894%,0.009894% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.015748%,0.015748%,0.015748% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000386%,0.000386%,0.000386% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",263454720,263454720,263454720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",817889280,817889280,817889280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",42270720,42270720,42270720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",35467463,35467463,35467463 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",25569280,25569280,25569280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",25569280,25569280,25569280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",852368,852368,852368 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",4.219206%,4.219206%,4.219206% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.586501%,0.586501%,0.586501% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.185258,0.185258,0.185258 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.185342,0.185342,0.185342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",4.633561%,4.633561%,4.633561% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",92.950476%,92.950476%,92.950476% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.982665,0.982665,0.982665 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.549705,0.549705,0.549705 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",12034.000000,12034.000000,12034.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.936866%,98.936866%,98.936866% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000406,0.000406,0.000406 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",4.000000,4.000000,4.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",2209350,2209350,2209350 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",83910263,83910263,83910263 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",315590,315590,315590 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",13568748,13568748,13568748 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",1234.961869GB/s,1234.961869GB/s,1234.961869GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",1234.961869GB/s,1234.961869GB/s,1234.961869GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",43.391729%,43.391729%,43.391729% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",93.308867%,93.308867%,93.308867% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",85.591750%,85.591750%,85.591750% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",4.646082GB/s,4.646082GB/s,4.646082GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",199.757652GB/s,199.757652GB/s,199.757652GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",2007.114542GB/s,2007.114542GB/s,2007.114542GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",32.139776GB/s,32.139776GB/s,32.139776GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1234.961869GB/s,1234.961869GB/s,1234.961869GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",32.525814GB/s,32.525814GB/s,32.525814GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",1235.317889GB/s,1235.317889GB/s,1235.317889GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",77.184570KB/s,77.184570KB/s,77.183594KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",68162560,68162560,68162560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",61614080,61614080,61614080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",43988434,43988434,43988434 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.019177%,0.019177%,0.019177% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.185574%,0.185574%,0.185574% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.951993%,0.951993%,0.951993% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",95.502805%,95.502805%,95.502805% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.007958%,0.007958%,0.007958% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.023470%,0.023470%,0.023470% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001237%,0.001237%,0.001237% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",389283840,389283840,389283840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",943718400,943718400,943718400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",63242240,63242240,63242240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",43988434,43988434,43988434 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",29501440,29501440,29501440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",29501440,29501440,29501440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",2183128,2183128,2183128 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",2.221456%,2.221456%,2.221456% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.086330%,1.086330%,1.086330% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",83886080,83886080,83886080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.235875,0.235875,0.235875 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.235835,0.235835,0.235835 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",5.895884%,5.895884%,5.895884% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",91.993898%,91.993898%,91.993898% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.980848,0.980848,0.980848 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.912482,0.912482,0.912482 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",23875.000000,23875.000000,23875.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.408639%,98.408639%,98.408639% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000296,0.000296,0.000296 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",10342,10342,10342 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",2154,2154,2154 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",741,741,741 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.996948%,99.996948%,99.996948% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",90.000000%,90.000000%,90.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",19.565827MB/s,19.565827MB/s,19.565826MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",6.730863MB/s,6.730863MB/s,6.730862MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5953.134885GB/s,5953.134885GB/s,5953.134885GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",93.014894MB/s,93.014894MB/s,93.014893MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",93.941409MB/s,93.941409MB/s,93.941408MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.823242KB/s,148.823242KB/s,148.822266KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.506836KB/s,46.506836KB/s,46.505859KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",122240000,122240000,122240000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",87255675,87255675,87255675 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.019118%,0.019118%,0.019118% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.414088%,1.414088%,1.414088% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",55.461061%,55.461061%,55.461061% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",41.838476%,41.838476%,41.838476% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003128%,0.003128%,0.003128% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.023842%,0.023842%,0.023842% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001052%,0.001052%,0.001052% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",1406566400,1406566400,1406566400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",87255675,87255675,87255675 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000067%,0.000067%,0.000067% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.239168%,1.239168%,1.239168% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.259666,0.259666,0.259666 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.259743,0.259743,0.259743 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",6.493582%,6.493582%,6.493582% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.753758%,99.753758%,99.753758% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.990125,0.990125,0.990125 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.040478,1.040478,1.040478 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_per_warp","Instructions per warp",31039.000000,31039.000000,31039.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.775935%,98.775935%,98.775935% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000227,0.000227,0.000227 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_transactions","L2 Read Transactions",3803174,3803174,3803174 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_transactions","L2 Write Transactions",20,20,20 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_transactions","Device Memory Read Transactions",185186,185186,185186 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_transactions","Device Memory Write Transactions",77699,77699,77699 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",98.781801%,98.781801%,98.781801% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",85.716592%,85.716592%,85.716592% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_read_throughput","Device Memory Read Throughput",1.638969GB/s,1.638969GB/s,1.638969GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_write_throughput","Device Memory Write Throughput",704.170794MB/s,704.170794MB/s,704.170794MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_throughput","Unified cache to SM throughput",5939.580379GB/s,5939.580379GB/s,5939.580379GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",36.176837GB/s,36.176837GB/s,36.176837GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_read_throughput","L2 Throughput (Reads)",33.659590GB/s,33.659590GB/s,33.659590GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_write_throughput","L2 Throughput (Writes)",185.605469KB/s,185.605469KB/s,185.604492KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.401367KB/s,46.401367KB/s,46.400391KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_executed","Instructions Executed",158919680,158919680,158919680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_issued","Instructions Issued",123937305,123937305,123937305 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.023326%,0.023326%,0.023326% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.896186%,1.896186%,1.896186% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",63.514333%,63.514333%,63.514333% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_texture","Issue Stall Reasons (Texture)",33.539528%,33.539528%,33.539528% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_other","Issue Stall Reasons (Other)",0.003059%,0.003059%,0.003059% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.022398%,0.022398%,0.022398% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001051%,0.001051%,0.001051% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_integer","Integer Instructions",2580316160,2580316160,2580316160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slots","Issue Slots",123937305,123937305,123937305 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",4087596,4087596,4087596 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000065%,0.000065%,0.000065% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.000054%,1.000054%,1.000054% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ipc","Executed IPC",0.368036,0.368036,0.368036 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issued_ipc","Issued IPC",0.368175,0.368175,0.368175 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"issue_slot_utilization","Issue Slot Utilization",9.204374%,9.204374%,9.204374% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sm_efficiency","Multiprocessor Activity",99.737695%,99.737695%,99.737695% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"achieved_occupancy","Achieved Occupancy",0.997842,0.997842,0.997842 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.009936,1.009936,1.009936 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",8898.000000,8898.000000,8898.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.562177%,98.562177%,98.562177% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000211,0.000211,0.000211 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",82894,82894,82894 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544336,335544336,335544336 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",81938,81938,81938 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",12313793,12313793,12313793 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",1344.293267GB/s,1344.293267GB/s,1344.293267GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",1344.293267GB/s,1344.293267GB/s,1344.293267GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",1344.293267GB/s,1344.293267GB/s,1344.293267GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",3.006629%,3.006629%,3.006629% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.838912%,99.838912%,99.838912% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",336.147161MB/s,336.147161MB/s,336.147160MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",49.332824GB/s,49.332824GB/s,49.332824GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",42.091214GB/s,42.091214GB/s,42.091214GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",336.073317MB/s,336.073317MB/s,336.073316MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1344.293267GB/s,1344.293267GB/s,1344.293267GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",340.069110MB/s,340.069110MB/s,340.069109MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",1344.293331GB/s,1344.293331GB/s,1344.293331GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",21.003906KB/s,21.003906KB/s,21.002930KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",5248000,5248000,5248000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",45557760,45557760,45557760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",27920128,27920128,27920128 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003840%,0.003840%,0.003840% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.025262%,0.025262%,0.025262% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.530059%,0.530059%,0.530059% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.079188%,0.079188%,0.079188% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001058%,0.001058%,0.001058% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003935%,0.003935%,0.003935% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000084%,0.000084%,0.000084% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",179404800,179404800,179404800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",692060160,692060160,692060160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",10977280,10977280,10977280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",27920128,27920128,27920128 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",21637120,21637120,21637120 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",81920,81920,81920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",98.683784%,98.683784%,98.683784% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.672790%,0.672790%,0.672790% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.038387,0.038387,0.038387 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.038403,0.038403,0.038403 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.960084%,0.960084%,0.960084% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",97.421120%,97.421120%,97.421120% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.982967,0.982967,0.982967 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.457697,0.457697,0.457697 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",32195.000000,32195.000000,32195.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.819887%,98.819887%,98.819887% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000089,0.000089,0.000089 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",11358,11358,11358 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",2058,2058,2058 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",1513,1513,1513 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.998474%,99.998474%,99.998474% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",80.000000%,80.000000%,80.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",9.369230MB/s,9.369230MB/s,9.369229MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",6.888068MB/s,6.888068MB/s,6.888067MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",2983.676558GB/s,2983.676558GB/s,2983.676558GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",46.618523MB/s,46.618523MB/s,46.618522MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",51.708319MB/s,51.708319MB/s,51.708318MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",74.588867KB/s,74.588867KB/s,74.587891KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.308594KB/s,23.308594KB/s,23.307617KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",164838400,164838400,164838400 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",129839325,129839325,129839325 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008066%,0.008066%,0.008066% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.985024%,0.985024%,0.985024% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.999320%,98.999320%,98.999320% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.002913%,0.002913%,0.002913% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001541%,0.001541%,0.001541% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002301%,0.002301%,0.002301% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000231%,0.000231%,0.000231% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",2769715200,2769715200,2769715200 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",129839325,129839325,129839325 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",10240,10240,10240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000572%,0.000572%,0.000572% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.193420,0.193420,0.193420 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.193438,0.193438,0.193438 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",4.835943%,4.835943%,4.835943% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",99.903537%,99.903537%,99.903537% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999456,0.999456,0.999456 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.193802,0.193802,0.193802 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31807.000000,31807.000000,31807.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.805491%,98.805491%,98.805491% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000101,0.000101,0.000101 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",335545510,335545510,335545510 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",23380,23380,23380 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",335435234,335435234,335435234 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96780,96780,96780 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.013047%,50.013047%,50.013047% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",832.091642GB/s,832.091642GB/s,832.091642GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",245.837457MB/s,245.837457MB/s,245.837456MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1664.775291GB/s,1664.775291GB/s,1664.775291GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",832.362244GB/s,832.362244GB/s,832.362244GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",832.365196GB/s,832.365196GB/s,832.365196GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",59.389127MB/s,59.389127MB/s,59.389126MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.004883KB/s,13.004883KB/s,13.003906KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",127841280,127841280,127841280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",127853991,127853991,127853991 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006589%,0.006589%,0.006589% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.551315%,0.551315%,0.551315% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.361225%,99.361225%,99.361225% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.077958%,0.077958%,0.077958% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000861%,0.000861%,0.000861% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001248%,0.001248%,0.001248% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000008%,0.000008%,0.000008% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2706145280,2706145280,2706145280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",127853991,127853991,127853991 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000018%,0.000018%,0.000018% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000778%,0.000778%,0.000778% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.113996,0.113996,0.113996 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.107016,0.107016,0.107016 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.675389%,2.675389%,2.675389% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.123239%,99.123239%,99.123239% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999320,0.999320,0.999320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.107481,0.107481,0.107481 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",18369.000000,18369.000000,18369.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.628464%,98.628464%,98.628464% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001132,0.001132,0.001132 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",97329862,97329862,97329862 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167795359,167795359,167795359 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",95714550,95714550,95714550 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",105137554,105137554,105137554 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",491.893739GB/s,491.893739GB/s,491.893739GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",491.893739GB/s,491.893739GB/s,491.893739GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",6.332340%,6.332340%,6.332340% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.675336%,50.675336%,50.675336% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",37.932298%,37.932298%,37.932298% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",280.626940GB/s,280.626940GB/s,280.626940GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",308.254388GB/s,308.254388GB/s,308.254388GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",676.413936GB/s,676.413936GB/s,676.413936GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",285.614051GB/s,285.614051GB/s,285.614051GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",491.893739GB/s,491.893739GB/s,491.893739GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",285.362898GB/s,285.362898GB/s,285.362898GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",491.961756GB/s,491.961756GB/s,491.961756GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",15.371094KB/s,15.371094KB/s,15.370117KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",115348480,115348480,115348480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",94049280,94049280,94049280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",59105527,59105527,59105527 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","High (8)","High (8)","High (8)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.011005%,0.011005%,0.011005% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.087473%,0.087473%,0.087473% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",6.782480%,6.782480%,6.782480% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",91.861254%,91.861254%,91.861254% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.014169%,0.014169%,0.014169% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.004258%,0.004258%,0.004258% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000207%,0.000207%,0.000207% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",630292480,630292480,630292480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1132462080,1132462080,1132462080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",105185280,105185280,105185280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",59105527,59105527,59105527 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",35399680,35399680,35399680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",35399680,35399680,35399680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",97415524,97415524,97415524 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.844599%,0.844599%,0.844599% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.394555%,0.394555%,0.394555% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.074745,0.074745,0.074745 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.075042,0.075042,0.075042 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.876038%,1.876038%,1.876038% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",77.223417%,77.223417%,77.223417% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.740780,0.740780,0.740780 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.261242,0.261242,0.261242 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",9218.000000,9218.000000,9218.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.612090%,98.612090%,98.612090% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000264,0.000264,0.000264 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",8.000000,8.000000,8.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",82022,82022,82022 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",167787088,167787088,167787088 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",82520,82520,82520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",118756,118756,118756 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",1301.670460GB/s,1301.670460GB/s,1301.670460GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",1301.670460GB/s,1301.670460GB/s,1301.670460GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",5.836397%,5.836397%,5.836397% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",99.857717%,99.857717%,99.857717% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",655.602089MB/s,655.602089MB/s,655.602088MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",943.488629MB/s,943.488629MB/s,943.488628MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",162.867703GB/s,162.867703GB/s,162.867703GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",650.835229MB/s,650.835229MB/s,650.835228MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1301.670460GB/s,1301.670460GB/s,1301.670460GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",651.645596MB/s,651.645596MB/s,651.645595MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",1301.786280GB/s,1301.786280GB/s,1301.786280GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",40.676758KB/s,40.676758KB/s,40.675781KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",10490880,10490880,10490880 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",29552640,29552640,29552640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",29560444,29560444,29560444 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.008932%,0.008932%,0.008932% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.050366%,0.050366%,0.050366% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",0.937622%,0.937622%,0.937622% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.418828%,0.418828%,0.418828% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.004424%,0.004424%,0.004424% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002910%,0.002910%,0.002910% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000080%,0.000080%,0.000080% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",211025920,211025920,211025920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",713031680,713031680,713031680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",29560444,29560444,29560444 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",22292480,22292480,22292480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",22292480,22292480,22292480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",81920,81920,81920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",97.237144%,97.237144%,97.237144% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.339694%,1.339694%,1.339694% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",167772160,167772160,167772160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.115724,0.115724,0.115724 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.078743,0.078743,0.078743 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",1.968580%,1.968580%,1.968580% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",97.350031%,97.350031%,97.350031% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.974266,0.974266,0.974266 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.912485,0.912485,0.912485 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",40127.000000,40127.000000,40127.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.053162%,99.053162%,99.053162% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000109,0.000109,0.000109 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671090142,671090142,671090142 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",42,42,42 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",670953462,670953462,670953462 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",97282,97282,97282 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.024573%,0.024573%,0.024573% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",831.132351GB/s,831.132351GB/s,831.132351GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",123.398601MB/s,123.398601MB/s,123.398600MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",831.325170GB/s,831.325170GB/s,831.325170GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",831.299801GB/s,831.299801GB/s,831.299801GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",831.301661GB/s,831.301661GB/s,831.301661GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",54.553711KB/s,54.553711KB/s,54.552734KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.494141KB/s,6.494141KB/s,6.493164KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",205450240,205450240,205450240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",170458274,170458274,170458274 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003234%,0.003234%,0.003234% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.359704%,0.359704%,0.359704% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.634344%,99.634344%,99.634344% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001100%,0.001100%,0.001100% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000436%,0.000436%,0.000436% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.000743%,0.000743%,0.000743% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000122%,0.000122%,0.000122% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4069294080,4069294080,4069294080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",170458274,170458274,170458274 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000308%,0.000308%,0.000308% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.072080,0.072080,0.072080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.072122,0.072122,0.072122 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.803060%,1.803060%,1.803060% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",98.063945%,98.063945%,98.063945% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999048,0.999048,0.999048 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.072319,0.072319,0.072319 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",37567.000000,37567.000000,37567.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.988640%,98.988640%,98.988640% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000079,0.000079,0.000079 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",133674010,133674010,133674010 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",41,41,41 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",264638,264638,264638 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",83164,83164,83164 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",80.080603%,80.080603%,80.080603% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",99.796180%,99.796180%,99.796180% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",1.174543GB/s,1.174543GB/s,1.174543GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",377.965555MB/s,377.965555MB/s,377.965554MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",2978.585273GB/s,2978.585273GB/s,2978.585273GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",593.298109GB/s,593.298109GB/s,593.298109GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",593.285690GB/s,593.285690GB/s,593.285690GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",190.809570KB/s,190.809570KB/s,190.808594KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",23.268555KB/s,23.268555KB/s,23.267578KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",157332480,157332480,157332480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",157344924,157344924,157344924 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.010976%,0.010976%,0.010976% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.176651%,1.176651%,1.176651% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",98.803321%,98.803321%,98.803321% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.004109%,0.004109%,0.004109% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.001539%,0.001539%,0.001539% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002333%,0.002333%,0.002333% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000196%,0.000196%,0.000196% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",3649863680,3649863680,3649863680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",157344924,157344924,157344924 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",133676808,133676808,133676808 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000033%,0.000033%,0.000033% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000843%,0.000843%,0.000843% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.282588,0.282588,0.282588 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.234053,0.234053,0.234053 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",5.851326%,5.851326%,5.851326% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",99.888367%,99.888367%,99.888367% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999239,0.999239,0.999239 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.234591,0.234591,0.234591 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",31423.000000,31423.000000,31423.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.790894%,98.790894%,98.790894% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000131,0.000131,0.000131 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",327775678,327775678,327775678 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",23161,23161,23161 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",325994762,325994762,325994762 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",96067,96067,96067 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",2.340145%,2.340145%,2.340145% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.279766%,50.279766%,50.279766% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",816.828254GB/s,816.828254GB/s,816.828254GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",246.487167MB/s,246.487167MB/s,246.487166MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",1681.563493GB/s,1681.563493GB/s,1681.563493GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",821.081176GB/s,821.081176GB/s,821.081176GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",821.290603GB/s,821.290603GB/s,821.290603GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",59.426122MB/s,59.426122MB/s,59.426121MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",13.136719KB/s,13.136719KB/s,13.135742KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",160885760,160885760,160885760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",125891634,125891634,125891634 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.006746%,0.006746%,0.006746% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.555996%,0.555996%,0.555996% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.285557%,99.285557%,99.285557% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.146578%,0.146578%,0.146578% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.000881%,0.000881%,0.000881% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002644%,0.002644%,0.002644% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000010%,0.000010%,0.000010% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2643230720,2643230720,2643230720 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",125891634,125891634,125891634 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",327692096,327692096,327692096 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000019%,0.000019%,0.000019% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.001571%,0.001571%,0.001571% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.107672,0.107672,0.107672 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.107668,0.107668,0.107668 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",2.691690%,2.691690%,2.691690% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",98.150862%,98.150862%,98.150862% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.999370,0.999370,0.999370 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.108671,0.108671,0.108671 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",9858.000000,9858.000000,9858.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.702196%,98.702196%,98.702196% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001348,0.001348,0.001348 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",26325122,26325122,26325122 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544359,335544359,335544359 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",25884182,25884182,25884182 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",57125279,57125279,57125279 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",1249.843770GB/s,1249.843770GB/s,1249.843770GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",1249.843770GB/s,1249.843770GB/s,1249.843770GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",1249.843770GB/s,1249.843770GB/s,1249.843770GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",6.725749%,6.725749%,6.725749% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",1.435369%,1.435369%,1.435369% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",83.667059%,83.667059%,83.667059% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",96.414040GB/s,96.414040GB/s,96.414040GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",212.781650GB/s,212.781650GB/s,212.781650GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",195.364373GB/s,195.364373GB/s,195.364373GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",98.092141GB/s,98.092141GB/s,98.092141GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",1249.843770GB/s,1249.843770GB/s,1249.843770GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",98.056465GB/s,98.056465GB/s,98.056465GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",1249.843915GB/s,1249.843915GB/s,1249.843915GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",19.528320KB/s,19.528320KB/s,19.527344KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",26219520,26219520,26219520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",50472960,50472960,50472960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",32866293,32866293,32866293 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Mid (4)","Mid (4)","Mid (4)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.004733%,0.004733%,0.004733% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.031513%,0.031513%,0.031513% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",4.629798%,4.629798%,4.629798% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",77.470354%,77.470354%,77.470354% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.003280%,0.003280%,0.003280% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.002780%,0.002780%,0.002780% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000028%,0.000028%,0.000028% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",252968960,252968960,252968960 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",775946240,775946240,775946240 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",10813440,10813440,10813440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",32866293,32866293,32866293 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",24258560,24258560,24258560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",24258560,24258560,24258560 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",26334700,26334700,26334700 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",17.686804%,17.686804%,17.686804% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.170710%,0.170710%,0.170710% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.042258,0.042258,0.042258 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.042315,0.042315,0.042315 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.057872%,1.057872%,1.057872% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",96.851851%,96.851851%,96.851851% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.985618,0.985618,0.985618 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.149710,0.149710,0.149710 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (3)","Low (3)","Low (3)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",14850.000000,14850.000000,14850.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.138468%,99.138468%,99.138468% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.001153,0.001153,0.001153 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",16.000000,16.000000,16.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",225134834,225134834,225134834 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",335544359,335544359,335544359 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",225224842,225224842,225224842 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",233054591,233054591,233054591 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",492.648575GB/s,492.648575GB/s,492.648575GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",492.648575GB/s,492.648575GB/s,492.648575GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",492.648575GB/s,492.648575GB/s,492.648575GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.656153%,0.656153%,0.656153% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.862630%,0.862630%,0.862630% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",30.991045%,30.991045%,30.991045% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",330.676727GB/s,330.676727GB/s,330.676727GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",342.172420GB/s,342.172420GB/s,342.172420GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",338.725964GB/s,338.725964GB/s,338.725964GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",333.241006GB/s,333.241006GB/s,333.241006GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",492.648575GB/s,492.648575GB/s,492.648575GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",330.544576GB/s,330.544576GB/s,330.544576GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",492.648632GB/s,492.648632GB/s,492.648632GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",7.697266KB/s,7.697266KB/s,7.696289KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",115348480,115348480,115348480 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",76032000,76032000,76032000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",58453953,58453953,58453953 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","High (9)","High (9)","High (9)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003093%,0.003093%,0.003093% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.051715%,0.051715%,0.051715% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",16.139009%,16.139009%,16.139009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",81.736898%,81.736898%,81.736898% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.004828%,0.004828%,0.004828% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.003663%,0.003663%,0.003663% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000770%,0.000770%,0.000770% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",609484800,609484800,609484800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",10485760,10485760,10485760 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1132462080,1132462080,1132462080 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",115671040,115671040,115671040 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",58453953,58453953,58453953 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",337920,337920,337920 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",35399680,35399680,35399680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",35399680,35399680,35399680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",226971380,226971380,226971380 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",1.890916%,1.890916%,1.890916% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.169108%,0.169108%,0.169108% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",335544320,335544320,335544320 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.036015,0.036015,0.036015 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.036056,0.036056,0.036056 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",0.901405%,0.901405%,0.901405% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",79.959865%,79.959865%,79.959865% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.739977,0.739977,0.739977 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.115945,0.115945,0.115945 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_per_warp","Instructions per warp",39743.000000,39743.000000,39743.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",99.044014%,99.044014%,99.044014% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000129,0.000129,0.000129 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_transactions","L2 Read Transactions",671089302,671089302,671089302 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_transactions","L2 Write Transactions",38,38,38 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_transactions","Device Memory Read Transactions",668957018,668957018,668957018 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_transactions","Device Memory Write Transactions",109303,109303,109303 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",0.302625%,0.302625%,0.302625% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_read_throughput","Device Memory Read Throughput",821.655547GB/s,821.655547GB/s,821.655547GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_write_throughput","Device Memory Write Throughput",137.474964MB/s,137.474964MB/s,137.474963MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_throughput","Unified cache to SM throughput",824.298895GB/s,824.298895GB/s,824.298895GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",824.273741GB/s,824.273741GB/s,824.273741GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_read_throughput","L2 Throughput (Reads)",824.274554GB/s,824.274554GB/s,824.274554GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_write_throughput","L2 Throughput (Writes)",48.940430KB/s,48.940430KB/s,48.939453KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_throughput","System Memory Write Throughput",6.439453KB/s,6.439453KB/s,6.438477KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_executed","Instructions Executed",203484160,203484160,203484160 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_issued","Instructions Issued",168492027,168492027,168492027 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"dram_utilization","Device Memory Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.003309%,0.003309%,0.003309% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",0.357870%,0.357870%,0.357870% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",99.634736%,99.634736%,99.634736% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_texture","Issue Stall Reasons (Texture)",0.001715%,0.001715%,0.001715% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_other","Issue Stall Reasons (Other)",0.000438%,0.000438%,0.000438% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.001520%,0.001520%,0.001520% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.000114%,0.000114%,0.000114% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_integer","Integer Instructions",4006379520,4006379520,4006379520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slots","Issue Slots",168492027,168492027,168492027 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",671088640,671088640,671088640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000009%,0.000009%,0.000009% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",0.000289%,0.000289%,0.000289% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ipc","Executed IPC",0.071679,0.071679,0.071679 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issued_ipc","Issued IPC",0.071689,0.071689,0.071689 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"issue_slot_utilization","Issue Slot Utilization",1.792219%,1.792219%,1.792219% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sm_efficiency","Multiprocessor Activity",97.197229%,97.197229%,97.197229% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"achieved_occupancy","Achieved Occupancy",0.999210,0.999210,0.999210 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",0.071862,0.071862,0.071862 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"l2_utilization","L2 Cache Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_utilization","Unified Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Low (2)","Low (2)","Low (2)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int4*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_per_warp","Instructions per warp",30015.000000,30015.000000,30015.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"branch_efficiency","Branch Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_execution_efficiency","Warp Execution Efficiency",100.000000%,100.000000%,100.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"warp_nonpred_execution_efficiency","Warp Non-Predicated Execution Efficiency",98.734175%,98.734175%,98.734175% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_replay_overhead","Instruction Replay Overhead",0.000219,0.000219,0.000219 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions_per_request","Shared Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions_per_request","Shared Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions_per_request","Local Memory Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions_per_request","Local Memory Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions_per_request","Global Load Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions_per_request","Global Store Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_transactions","Shared Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_transactions","Shared Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_transactions","Local Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_transactions","Local Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_transactions","Global Load Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_transactions","Global Store Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_transactions","System Memory Read Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_transactions","System Memory Write Transactions",5,5,5 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_transactions","L2 Read Transactions",164750,164750,164750 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_transactions","L2 Write Transactions",16,16,16 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_transactions","Device Memory Read Transactions",163850,163850,163850 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_transactions","Device Memory Write Transactions",51810,51810,51810 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"global_hit_rate","Global Hit Rate in unified l1/tex",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_hit_rate","Local Hit Rate",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_requested_throughput","Requested Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_requested_throughput","Requested Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_throughput","Global Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_throughput","Global Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_memory_overhead","Local Memory Overhead",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_hit_rate","Unified Cache Hit Rate",99.951172%,99.951172%,99.951172% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_hit_rate","L2 Hit Rate (Texture Reads)",50.000000%,50.000000%,50.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_hit_rate","L2 Hit Rate (Texture Writes)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_read_throughput","Device Memory Read Throughput",1.449961GB/s,1.449961GB/s,1.449961GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_write_throughput","Device Memory Write Throughput",469.486981MB/s,469.486981MB/s,469.486980MB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_throughput","Unified cache to SM throughput",5938.860804GB/s,5938.860804GB/s,5938.860804GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_throughput","L2 Throughput (Texture Reads)",1.449873GB/s,1.449873GB/s,1.449873GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_throughput","L2 Throughput (Texture Writes)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_read_throughput","L2 Throughput (Reads)",1.457926GB/s,1.457926GB/s,1.457926GB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_write_throughput","L2 Throughput (Writes)",148.466797KB/s,148.466797KB/s,148.465820KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_throughput","System Memory Read Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_throughput","System Memory Write Throughput",46.395508KB/s,46.395508KB/s,46.394531KB/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_load_throughput","Local Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"local_store_throughput","Local Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_load_throughput","Shared Memory Load Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_store_throughput","Shared Memory Store Throughput",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gld_efficiency","Global Memory Load Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"gst_efficiency","Global Memory Store Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_cache_transactions","Unified cache to SM transactions",335549440,335549440,335549440 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp","Floating Point Operations(Double Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_add","Floating Point Operations(Double Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_fma","Floating Point Operations(Double Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_dp_mul","Floating Point Operations(Double Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp","Floating Point Operations(Single Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_add","Floating Point Operations(Single Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_fma","Floating Point Operations(Single Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_mul","Floating Point Operation(Single Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_sp_special","Floating Point Operations(Single Precision Special)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_executed","Instructions Executed",153676800,153676800,153676800 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_issued","Instructions Issued",118695254,118695254,118695254 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"dram_utilization","Device Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_utilization","System Memory Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_inst_fetch","Issue Stall Reasons (Instructions Fetch)",0.024107%,0.024107%,0.024107% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_exec_dependency","Issue Stall Reasons (Execution Dependency)",1.881256%,1.881256%,1.881256% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_dependency","Issue Stall Reasons (Data Request)",57.363802%,57.363802%,57.363802% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_texture","Issue Stall Reasons (Texture)",39.471717%,39.471717%,39.471717% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sync","Issue Stall Reasons (Synchronization)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_other","Issue Stall Reasons (Other)",0.003197%,0.003197%,0.003197% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_constant_memory_dependency","Issue Stall Reasons (Immediate constant)",0.024306%,0.024306%,0.024306% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_pipe_busy","Issue Stall Reasons (Pipe Busy)",0.001177%,0.001177%,0.001177% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_efficiency","Shared Memory Efficiency",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_32","FP Instructions(Single)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_64","FP Instructions(Double)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_integer","Integer Instructions",2412544000,2412544000,2412544000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_bit_convert","Bit-Convert Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_control","Control-Flow Instructions",20971520,20971520,20971520 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_compute_ld_st","Load/Store Instructions",1342177280,1342177280,1342177280 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_misc","Misc Instructions",327680,327680,327680 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_inter_thread_communication","Inter-Thread Instructions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slots","Issue Slots",118695254,118695254,118695254 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_issued","Issued Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_executed","Executed Control-Flow Instructions",665600,665600,665600 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_issued","Issued Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_executed","Executed Load/Store Instructions",42608640,42608640,42608640 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions","Atomic Transactions",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"atomic_transactions_per_request","Atomic Transactions Per Request",0.000000,0.000000,0.000000 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_throughput","L2 Throughput (Atomic requests)",0.000000B/s,0.000000B/s,0.000000B/s "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_atomic_transactions","L2 Transactions (Atomic requests)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_read_transactions","L2 Transactions (Texture Reads)",163840,163840,163840 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_memory_throttle","Issue Stall Reasons (Memory Throttle)",0.000067%,0.000067%,0.000067% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_not_selected","Issue Stall Reasons (Not Selected)",1.230370%,1.230370%,1.230370% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_tex_write_transactions","L2 Transactions (Texture Writes)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp","Floating Point Operations(Half Precision)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_add","Floating Point Operations(Half Precision Add)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_mul","Floating Point Operation(Half Precision Mul)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_count_hp_fma","Floating Point Operations(Half Precision FMA)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"inst_fp_16","HP Instructions(Half)",0,0,0 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ipc","Executed IPC",0.352526,0.352526,0.352526 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issued_ipc","Issued IPC",0.352603,0.352603,0.352603 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"issue_slot_utilization","Issue Slot Utilization",8.815073%,8.815073%,8.815073% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sm_efficiency","Multiprocessor Activity",99.740031%,99.740031%,99.740031% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"achieved_occupancy","Achieved Occupancy",0.992738,0.992738,0.992738 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"eligible_warps_per_cycle","Eligible Warps Per Active Cycle",1.117853,1.117853,1.117853 "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"shared_utilization","Shared Memory Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"l2_utilization","L2 Cache Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_utilization","Unified Cache Utilization","Mid (5)","Mid (5)","Mid (5)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"ldst_fu_utilization","Load/Store Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"cf_fu_utilization","Control-Flow Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"tex_fu_utilization","Texture Function Unit Utilization","Max (10)","Max (10)","Max (10)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"special_fu_utilization","Special Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"half_precision_fu_utilization","Half-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"single_precision_fu_utilization","Single-Precision Function Unit Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"double_precision_fu_utilization","Double-Precision Function Unit Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_hp_efficiency","FLOP Efficiency(Peak Half)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_sp_efficiency","FLOP Efficiency(Peak Single)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"flop_dp_efficiency","FLOP Efficiency(Peak Double)",0.000000%,0.000000%,0.000000% "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_read_utilization","System Memory Read Utilization","Idle (0)","Idle (0)","Idle (0)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"sysmem_write_utilization","System Memory Write Utilization","Low (1)","Low (1)","Low (1)" "Tesla V100-PCIE-16GB (0)","void benchmark_func(int2*)",1,"stall_sleeping","Issue Stall Reasons (Sleeping)",0.000000%,0.000000%,0.000000% ==22725== Warning: One or more events or metrics can't be profiled. Rerun with "--print-gpu-trace" for detail.