diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d661721 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2007-2011, Kenneth Hoste and Lieven Eeckhout (Ghent University, Belgium) +kehoste@elis.ugent.be, leeckhou@elis.ugent.be + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of the organization nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/makefile b/Makefile similarity index 71% rename from makefile rename to Makefile index a5ad582..2738f18 100644 --- a/makefile +++ b/Makefile @@ -5,7 +5,7 @@ CXXFLAGS ?= -Wall -Werror -Wno-unknown-pragmas $(DBG) $(OPT) CXX=g++ -all: mica_v0-23 +all: mica mica_all.o: mica.h mica_all.h mica_all.cpp $(CXX) -g -c $(CXXFLAGS) $(PIN_CXXFLAGS) mica_all.cpp -o mica_all.o @@ -37,12 +37,12 @@ mica_memfootprint.o: mica.h mica_memfootprint.h mica_memfootprint.cpp mica_memreusedist.o: mica.h mica_memreusedist.h mica_memreusedist.cpp $(CXX) -g -c $(CXXFLAGS) $(PIN_CXXFLAGS) mica_memreusedist.cpp -o mica_memreusedist.o -mica_v0-23.o: mica.h mica_v0-23.cpp - $(CXX) -g -c $(CXXFLAGS) $(PIN_CXXFLAGS) mica_v0-23.cpp -o mica_v0-23.o +mica.o: mica.h mica.cpp + $(CXX) -g -c $(CXXFLAGS) $(PIN_CXXFLAGS) mica.cpp -o mica.o -mica_v0-23: mica.h mica_v0-23.o mica_all.o mica_init.o mica_utils.o mica_ilp.o mica_itypes.o mica_ppm.o mica_reg.o mica_stride.o mica_memfootprint.o mica_memreusedist.o - $(CXX) -g $(PIN_LDFLAGS) $(LINK_DEBUG) mica_v0-23.o mica_all.o mica_init.o mica_utils.o mica_ilp.o mica_itypes.o mica_ppm.o mica_reg.o mica_stride.o mica_memfootprint.o mica_memreusedist.o -o mica_v0-23 $(PIN_LPATHS) $(PIN_LIBS) $(DBG) +mica: mica.h mica.o mica_all.o mica_init.o mica_utils.o mica_ilp.o mica_itypes.o mica_ppm.o mica_reg.o mica_stride.o mica_memfootprint.o mica_memreusedist.o + $(CXX) -g $(PIN_LDFLAGS) $(LINK_DEBUG) mica.o mica_all.o mica_init.o mica_utils.o mica_ilp.o mica_itypes.o mica_ppm.o mica_reg.o mica_stride.o mica_memfootprint.o mica_memreusedist.o -o mica.so $(PIN_LPATHS) $(PIN_LIBS) $(DBG) clean: - rm -f *.o mica_v0-23 *pin*out mica*log* + rm -f *.o mica.so *pin*out mica*log* diff --git a/README b/README index 4f21b47..6b5251a 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ MICA: Microarchitecture-Independent Characterization of Applications ==================================================================== -version 0.23 +version 0.3 Kenneth Hoste & Lieven Eeckhout (Ghent University, Belgium) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 9826470..20a377a 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,13 +1,29 @@ -X Xth 2009 +February 27th 2011 ------------------ MICA v0.3 -- increases flexibility of itypes analysis significantly +- increased flexibility of itypes analysis significantly - instruction groups used in itypes analysis can be specified by the user now, using a itypes.spec file; specify the filename in mica.conf using an entry like: itypes_spec_file: - - by default, the old instruction groups are used + - by default, the old instruction groups are used + (except for SYSCALL, which was added to the group formely known as 'other') +- made block size in ilp, memfootprint and memreusedist flexible + - size can be set by specifying 'block_size: ' in the mica.conf file + - default block size is 2^6 (64) bytes, which is a change compared to MICA v0.23 for ilp +- made page size in memfootprint flexible + - size can be set by specifying 'page_size: ' in the mica.conf file + - default page size is 4096 (2^12) bytes +- possibly expensive assert statements and other sanity checks were removed +- bug fixes: + - memory read size wasn't being used 100% correctly + in ilp, memfootprint, memreusedist and stride analysis, + the size was being added to the start address of the read, + while (size-1) should be added; otherwise, e.g. for memfootprint, + we count an extra block being touched if the access is near a block boundary + - a small problem with an assert statement was fixed in memfootprint (>= 0 instead of >) + - fprintf statements were fixed for 64-bit systems September 22th 2009 diff --git a/itypes_default.spec b/itypes_default.spec new file mode 100644 index 0000000..bd0196c --- /dev/null +++ b/itypes_default.spec @@ -0,0 +1,30 @@ +0, 0, SPECIAL, mem_read +1, 0, SPECIAL, mem_write +2, 0, CATEGORY, COND_BR +2, 1, CATEGORY, UNCOND_BR +2, 2, OPCODE, LEAVE +2, 3, OPCODE, RET_NEAR +2, 4, OPCODE, CALL_NEAR +3, 0, CATEGORY, LOGICAL +3, 1, CATEGORY, DATAXFER +3, 2, CATEGORY, BINARY +3, 3, CATEGORY, FLAGOP +3, 4, CATEGORY, BITBYTE +4, 0, CATEGORY, X87_ALU +4, 1, CATEGORY, FCMOV +5, 0, CATEGORY, POP +5, 1, CATEGORY, PUSH +6, 0, CATEGORY, SHIFT +7, 0, CATEGORY, STRINGOP +8, 0, CATEGORY, MMX +8, 1, CATEGORY, SSE +9, 0, CATEGORY, INTERRUPT +9, 1, CATEGORY, ROTATE +9, 2, CATEGORY, SEMAPHORE +9, 3, CATEGORY, CMOV +9, 4, CATEGORY, SYSTEM +9, 5, CATEGORY, MISC +9, 6, CATEGORY, PREFETCH +9, 7, CATEGORY, SYSCALL +10, 0, CATEGORY, WIDENOP +10, 1, CATEGORY, NOP diff --git a/mica.conf.example b/mica.conf.example index b5a8d07..6d61866 100644 --- a/mica.conf.example +++ b/mica.conf.example @@ -1,3 +1,6 @@ analysis_type: ilp_one interval_size: full ilp_size: 32 +block_size: 6 +page_size: 12 +itypes_spec_file: itypes_default.spec diff --git a/mica.cpp b/mica.cpp index db11386..2d63ee8 100644 --- a/mica.cpp +++ b/mica.cpp @@ -1,25 +1,3 @@ -/* Copyright (c) 2007, Kenneth Hoste and Lieven Eeckhout (Ghent University, Belgium) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of the organization nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - /* * This file is part of MICA, a Pin tool to collect * microarchitecture-independent program characteristics using the Pin @@ -63,7 +41,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* global */ INT64 interval_size; // interval size chosen INT64 interval_ins_count; +INT64 interval_ins_count_for_hpc_alignment; INT64 total_ins_count; +INT64 total_ins_count_for_hpc_alignment; ins_buffer_entry* ins_buffer[MAX_MEM_TABLE_ENTRIES]; @@ -71,12 +51,20 @@ ins_buffer_entry* ins_buffer[MAX_MEM_TABLE_ENTRIES]; UINT32 _ilp_win_size; char* _itypes_spec_file; +/* ILP, MEMFOOTPRINT, MEMREUSEDIST */ +UINT32 _block_size; + +/* MEMFOOTPRINT */ +UINT32 _page_size; + /********************************************** * MAIN * **********************************************/ -FILE* log; +//FILE* log; +ofstream log; +// find buffer entry for instruction at given address in a hash table ins_buffer_entry* findInsBufferEntry(ADDRINT a){ ins_buffer_entry* e; @@ -129,10 +117,26 @@ ins_buffer_entry* findInsBufferEntry(ADDRINT a){ /* ALL */ VOID Instruction_all(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count,IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); + if(interval_size == -1) { + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } ADDRINT insAddr = INS_Address(ins); ins_buffer_entry* e = findInsBufferEntry(insAddr); @@ -159,10 +163,27 @@ VOID Fini_all(INT32 code, VOID* v){ /* ILP */ VOID Instruction_ilp_all_only(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count, IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); + if(interval_size == -1){ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } + ADDRINT insAddr = INS_Address(ins); ins_buffer_entry* e = findInsBufferEntry(insAddr); @@ -175,10 +196,26 @@ VOID Fini_ilp_all_only(INT32 code, VOID* v){ /* ILP_ONE */ VOID Instruction_ilp_one_only(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count, IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); + if(interval_size == -1){ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } ADDRINT insAddr = INS_Address(ins); @@ -192,10 +229,27 @@ VOID Fini_ilp_one_only(INT32 code, VOID* v){ /* ITYPES */ VOID Instruction_itypes_only(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count, IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); + if(interval_size == -1){ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } + instrument_itypes(ins, v); } @@ -205,10 +259,27 @@ VOID Fini_itypes_only(INT32 code, VOID* v){ /* PPM */ VOID Instruction_ppm_only(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count, IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); + if(interval_size == -1){ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } + instrument_ppm(ins, v); } @@ -218,10 +289,26 @@ VOID Fini_ppm_only(INT32 code, VOID* v){ /* REG */ VOID Instruction_reg_only(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count, IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); + if(interval_size == -1){ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } ADDRINT insAddr = INS_Address(ins); @@ -236,10 +323,27 @@ VOID Fini_reg_only(INT32 code, VOID* v){ /* STRIDE */ VOID Instruction_stride_only(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count, IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); + if(interval_size == -1){ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } + instrument_stride(ins, v); } @@ -249,10 +353,27 @@ VOID Fini_stride_only(INT32 code, VOID* v){ /* MEMFOOTPRINT */ VOID Instruction_memfootprint_only(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count, IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); + if(interval_size == -1){ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } + instrument_memfootprint(ins, v); } @@ -262,10 +383,27 @@ VOID Fini_memfootprint_only(INT32 code, VOID* v){ /* MEMREUSEDIST */ VOID Instruction_memreusedist_only(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count, IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); + if(interval_size == -1){ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } + instrument_memreusedist(ins, v); } @@ -276,12 +414,28 @@ VOID Fini_memreusedist_only(INT32 code, VOID* v){ /* MY TYPE */ VOID Instruction_custom(INS ins, VOID* v){ - if(interval_size == -1) - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count, IARG_END); - else - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count, IARG_END); - - fprintf(stderr,"Please choose a subset of characteristics you want to use, and remove this message (along with the exit call)\n"); + if(interval_size == -1){ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); + } + else{ + if(INS_HasRealRep(ins)){ + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); + } + else{ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); + } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); + } + + cerr << "Please choose a subset of characteristics you want to use, and remove this message (along with the exit call)" << endl; exit(1); // Choose subset of characteristics, and make the same adjustments in Fini_custom and init_custom below @@ -330,12 +484,14 @@ int main(int argc, char* argv[]){ setup_mica_log(&log); - read_config(log, &interval_size, &mode, &_ilp_win_size, &_itypes_spec_file); + read_config(&log, &interval_size, &mode, &_ilp_win_size, &_block_size, &_page_size, &_itypes_spec_file); - DEBUG_MSG("interval_size: %lld, mode: %d\n", interval_size, mode); + cerr << "interval_size: " << interval_size << ", mode: " << mode << endl; interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; total_ins_count = 0; + total_ins_count_for_hpc_alignment = 0; for(i=0; i < MAX_MEM_TABLE_ENTRIES; i++){ ins_buffer[i] = (ins_buffer_entry*)NULL; @@ -403,8 +559,8 @@ int main(int argc, char* argv[]){ PIN_AddFiniFunction(Fini_custom, 0); break; default: - ERROR("FATAL ERROR: Unknown mode while trying to allocate memory for Pin tool!\n"); - LOG_MSG("FATAL ERROR: Unknown mode while trying to allocate memory for Pin tool!\n"); + cerr << "FATAL ERROR: Unknown mode while trying to allocate memory for Pin tool!" << endl; + log << "FATAL ERROR: Unknown mode while trying to allocate memory for Pin tool!" << endl; exit(1); } diff --git a/mica.h b/mica.h index 53be9b6..541bfda 100644 --- a/mica.h +++ b/mica.h @@ -8,7 +8,10 @@ */ /* standard library includes */ -#include +//#include +#include +#include +using namespace std; #include #include #include @@ -19,9 +22,9 @@ #ifndef MICA #define MICA -#define LOG_MSG(str, a...) fprintf(log, str, ##a); fflush(log); -#define ERROR(str, a...) fprintf(stderr, str, ##a); -#define DEBUG_MSG(str, a...) fprintf(stderr, str, ##a); +//#define LOG_MSG(str, a...) fprintf(log, str, ##a); fflush(log); +//#define ERROR(str, a...) fprintf(stderr, str, ##a); +//#define DEBUG_MSG(str, a...) fprintf(stderr, str, ##a); /* *** defines *** */ diff --git a/mica_all.cpp b/mica_all.cpp index b12fc56..5ba77a9 100644 --- a/mica_all.cpp +++ b/mica_all.cpp @@ -20,7 +20,9 @@ #define PROGRESS_THRESHOLD 10000000 // 10M extern INT64 total_ins_count; +extern INT64 total_ins_count_for_hpc_alignment; extern INT64 interval_ins_count; +extern INT64 interval_ins_count_for_hpc_alignment; // one count for REP prefixed instructions extern INT64 interval_size; @@ -44,26 +46,53 @@ void init_all(){ init_memreusedist(); } +ADDRINT returnArg(BOOL arg){ -VOID all_instr_full_count(){ - total_ins_count++; + return arg; +} + +VOID all_instr_full_count_always(){ - if(total_ins_count % PROGRESS_THRESHOLD == 0){ + total_ins_count++; + + /*if(total_ins_count % PROGRESS_THRESHOLD == 0){ FILE* f = fopen("mica_progress.txt","w"); - fprintf(f,"%lld*10^7 instructions analyzed\n", total_ins_count/PROGRESS_THRESHOLD); + fprintf(f,"%lld*10^7 instructions analyzed\n", (long long)total_ins_count/PROGRESS_THRESHOLD); fclose(f); - } + }*/ +} + +VOID all_instr_full_count_for_hpc_alignment_no_rep(){ + total_ins_count_for_hpc_alignment++; +} + +VOID all_instr_full_count_for_hpc_alignment_with_rep(UINT32 repCnt){ + if(repCnt > 0){ + total_ins_count_for_hpc_alignment++; + } } -VOID all_instr_intervals_count(){ - total_ins_count++; +VOID all_instr_intervals_count_always(){ + total_ins_count++; interval_ins_count++; - if(total_ins_count % PROGRESS_THRESHOLD == 0){ + /*if(total_ins_count % PROGRESS_THRESHOLD == 0){ FILE* f = fopen("mica_progress.txt","w"); - fprintf(f,"%lld*10^7 instructions analyzed\n", total_ins_count/PROGRESS_THRESHOLD); + fprintf(f,"%lld*10^7 instructions analyzed\n", (long long)total_ins_count/PROGRESS_THRESHOLD); fclose(f); - } + }*/ +} + +VOID all_instr_intervals_count_for_hpc_alignment_no_rep(){ + total_ins_count_for_hpc_alignment++; + interval_ins_count_for_hpc_alignment++; +} + +VOID all_instr_intervals_count_for_hpc_alignment_with_rep(UINT32 repCnt){ + if(repCnt > 0){ + total_ins_count_for_hpc_alignment++; + interval_ins_count_for_hpc_alignment++; + } } ADDRINT all_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, UINT32 stride_index_memread1, UINT32 stride_index_memread2, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){ @@ -78,7 +107,12 @@ ADDRINT all_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRIN memOp(write_addr, write_size); memreusedist_memRead(read1_addr, read_size); // memreusedist memreusedist_memRead(read2_addr, read_size); - return ilp_buffer_instruction_2reads_write(_e, read1_addr, read2_addr, read_size, write_addr, write_size); + //return ilp_buffer_instruction_2reads_write(_e, read1_addr, read2_addr, read_size, write_addr, write_size); + ilp_buffer_instruction_only(_e); + ilp_buffer_instruction_read(read1_addr, read_size); + ilp_buffer_instruction_read2(read2_addr); + ilp_buffer_instruction_write(write_addr, write_size); + return ilp_buffer_instruction_next(); } ADDRINT all_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT read_size, UINT32 stride_index_memread1, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){ @@ -90,7 +124,11 @@ ADDRINT all_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT memOp(read1_addr, read_size); // memfootprint memOp(write_addr, write_size); memreusedist_memRead(read1_addr, read_size); // memreusedist - return ilp_buffer_instruction_read_write(_e, read1_addr, read_size, write_addr, write_size); + //return ilp_buffer_instruction_read_write(_e, read1_addr, read_size, write_addr, write_size); + ilp_buffer_instruction_only(_e); + ilp_buffer_instruction_read(read1_addr, read_size); + ilp_buffer_instruction_write(write_addr, write_size); + return ilp_buffer_instruction_next(); } ADDRINT all_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, UINT32 stride_index_memread1, UINT32 stride_index_memread2){ @@ -102,7 +140,11 @@ ADDRINT all_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read memOp(read2_addr, read_size); memreusedist_memRead(read1_addr, read_size); // memreusedist memreusedist_memRead(read2_addr, read_size); - return ilp_buffer_instruction_2reads(_e, read1_addr, read2_addr, read_size); + //return ilp_buffer_instruction_2reads(_e, read1_addr, read2_addr, read_size); + ilp_buffer_instruction_only(_e); + ilp_buffer_instruction_read(read1_addr, read_size); + ilp_buffer_instruction_read2(read2_addr); + return ilp_buffer_instruction_next(); } ADDRINT all_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_size, UINT32 stride_index_memread1){ @@ -111,7 +153,10 @@ ADDRINT all_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_s readMem_stride(stride_index_memread1, read1_addr, read_size); memOp(read1_addr, read_size); // memfootprint memreusedist_memRead(read1_addr, read_size); // memreusedist - return ilp_buffer_instruction_read(_e, read1_addr, read_size); + //return ilp_buffer_instruction_read(_e, read1_addr, read_size); + ilp_buffer_instruction_only(_e); + ilp_buffer_instruction_read(read1_addr, read_size); + return ilp_buffer_instruction_next(); } ADDRINT all_buffer_instruction_write(void* _e, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){ @@ -119,12 +164,17 @@ ADDRINT all_buffer_instruction_write(void* _e, ADDRINT write_addr, ADDRINT write //itypes_count_mem_write(); writeMem_stride(stride_index_memwrite, write_addr, write_size); memOp(write_addr, write_size); // memfootprint - return ilp_buffer_instruction_write(_e, write_addr, write_size); + //return ilp_buffer_instruction_write(_e, write_addr, write_size); + ilp_buffer_instruction_only(_e); + ilp_buffer_instruction_write(write_addr, write_size); + return ilp_buffer_instruction_next(); } ADDRINT all_buffer_instruction(void* _e){ - return ilp_buffer_instruction(_e); + //return ilp_buffer_instruction(_e); + ilp_buffer_instruction_only(_e); + return ilp_buffer_instruction_next(); } VOID all_instr_full(VOID* _e, ADDRINT instrAddr, ADDRINT size){ @@ -135,7 +185,7 @@ VOID all_instr_full(VOID* _e, ADDRINT instrAddr, ADDRINT size){ ADDRINT all_instr_intervals(VOID* _e, ADDRINT instrAddr, ADDRINT size){ reg_instr_intervals(_e); instrMem(instrAddr, size); - return (ADDRINT)(total_ins_count % interval_size == 0); + return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); }; VOID all_instr_interval(){ @@ -161,6 +211,20 @@ VOID all_instr_interval(){ memreusedist_instr_interval_reset(); interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; +} + +VOID all_instr_interval_for_ilp(){ + + // save these, because empty_ilp_buffer_all resets them + INT64 interval_ins_count_backup = interval_ins_count; + INT64 interval_ins_count_for_hpc_alignment_backup = interval_ins_count_for_hpc_alignment; + + empty_ilp_buffer_all(); + + // restore + interval_ins_count = interval_ins_count_backup; + interval_ins_count_for_hpc_alignment = interval_ins_count_for_hpc_alignment_backup; } VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ @@ -189,7 +253,7 @@ VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ regReadCnt = 0; for(i=0; i < maxNumRegsCons; i++){ reg = INS_RegR(ins, i); - assert((UINT32)reg < MAX_NUM_REGS); + //assert((UINT32)reg < MAX_NUM_REGS); // only consider valid general-purpose registers (any bit-width) and floating-point registers, // i.e. exlude branch, segment and pin registers, among others if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -198,17 +262,18 @@ VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ } e->regReadCnt = regReadCnt; - if((e->regsRead = (REG*)malloc(regReadCnt*sizeof(REG))) == (REG*)NULL){ - fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", e->insAddr); + e->regsRead = (REG*)malloc(regReadCnt*sizeof(REG)); + /*if((e->regsRead = (REG*)malloc(regReadCnt*sizeof(REG))) == (REG*)NULL){ + fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", (unsigned int)e->insAddr); exit(1); - } + }*/ regReadCnt = 0; for(i=0; i < maxNumRegsCons; i++){ reg = INS_RegR(ins, i); - assert((UINT32)reg < MAX_NUM_REGS); + //assert((UINT32)reg < MAX_NUM_REGS); // only consider valid general-purpose registers (any bit-width) and floating-point registers, // i.e. exlude branch, segment and pin registers, among others if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -229,7 +294,7 @@ VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ reg = INS_RegW(ins, i); - assert((UINT32)reg < MAX_NUM_REGS); + //assert((UINT32)reg < MAX_NUM_REGS); // only consider valid general-purpose registers (any bit-width) and floating-point registers, // i.e. exlude branch, segment and pin registers, among others */ if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -238,17 +303,18 @@ VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ } e->regWriteCnt = regWriteCnt; - if((e->regsWritten = (REG*)malloc(regWriteCnt*sizeof(REG))) == (REG*)NULL){ - fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", e->insAddr); + e->regsWritten = (REG*)malloc(regWriteCnt*sizeof(REG)); + /*if((e->regsWritten = (REG*)malloc(regWriteCnt*sizeof(REG))) == (REG*)NULL){ + fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", (unsigned int)e->insAddr); exit(1); - } + }*/ regWriteCnt = 0; for(i=0; i < maxNumRegsProd; i++){ reg = INS_RegW(ins, i); - assert((UINT32)reg < MAX_NUM_REGS); + //assert((UINT32)reg < MAX_NUM_REGS); // only consider valid general-purpose registers (any bit-width) and floating-point registers, // i.e. exlude branch, segment and pin registers, among others if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -266,10 +332,10 @@ VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ if(INS_OperandIsReg(ins,i)) regOpCnt++; } - if(regOpCnt >= MAX_NUM_OPER){ + /*if(regOpCnt >= MAX_NUM_OPER){ fprintf(stderr,"BOOM! -> MAX_NUM_OPER is exceeded! (%u)\n", regOpCnt); exit(1); - } + }*/ e->regOpCnt = regOpCnt; e->setRegOpCnt = true; } @@ -319,7 +385,8 @@ VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ } } /* InsertIfCall returns true if ILP buffer is full */ - INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)empty_ilp_buffer_all, IARG_END); + //INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)empty_ilp_buffer_all, IARG_END); + INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_interval_for_ilp, IARG_END); // wrapper for empty_ilp_buffer_all /* +++ ITYPES +++ */ @@ -361,7 +428,7 @@ VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ } } else{ - fprintf(stderr, "ERROR! Unknown identifier type specified (%d).\n", group_identifiers[i][j].type); + cerr << "ERROR! Unknown identifier type specified (" << group_identifiers[i][j].type << ")" << endl; } } } diff --git a/mica_all.h b/mica_all.h index 9d192d9..f375e1b 100644 --- a/mica_all.h +++ b/mica_all.h @@ -11,6 +11,11 @@ #include "mica_utils.h" VOID init_all(); -VOID all_instr_full_count(); -VOID all_instr_intervals_count(); +ADDRINT returnArg(BOOL arg); +VOID all_instr_full_count_always(); +VOID all_instr_full_count_for_hpc_alignment_no_rep(); +VOID all_instr_full_count_for_hpc_alignment_with_rep(UINT32 repCnt); +VOID all_instr_intervals_count_always(); +VOID all_instr_intervals_count_for_hpc_alignment_no_rep(); +VOID all_instr_intervals_count_for_hpc_alignment_with_rep(UINT32 repCnt); VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e); diff --git a/mica_ilp.cpp b/mica_ilp.cpp index 3535049..6b4e369 100644 --- a/mica_ilp.cpp +++ b/mica_ilp.cpp @@ -13,6 +13,9 @@ #include "mica_utils.h" #include "mica_ilp.h" +#include +using namespace std; + #define ILP_WIN_SIZE_CNT 4 const UINT32 win_sizes[ILP_WIN_SIZE_CNT] = {32, 64, 128, 256}; @@ -20,9 +23,13 @@ const UINT32 win_sizes[ILP_WIN_SIZE_CNT] = {32, 64, 128, 256}; extern UINT32 _ilp_win_size; UINT32 win_size; +extern UINT32 _block_size; +UINT32 ilp_block_size; + /* buffer settings */ -#define ILP_BUFFER_SIZE 256 +//#define ILP_BUFFER_SIZE 256 +#define ILP_BUFFER_SIZE 200 /* buffer variables */ @@ -50,9 +57,11 @@ VOID fini_ilp_buffering_one(); extern INT64 interval_size; extern INT64 interval_ins_count; +extern INT64 interval_ins_count_for_hpc_alignment; extern INT64 total_ins_count; -FILE* output_file_ilp_one; -FILE* output_file_ilp_all; +extern INT64 total_ins_count_for_hpc_alignment; +ofstream output_file_ilp_one; +ofstream output_file_ilp_all; INT32 size_pow_all_times_all; INT64 index_all_times_all; @@ -103,10 +112,11 @@ void init_ilp_one(){ init_ilp_buffering(); win_size = _ilp_win_size; + ilp_block_size = _block_size; size_pow_times = 10; if((all_times = (UINT64*)malloc((1 << size_pow_times) * sizeof(UINT64))) == (UINT64*)NULL){ - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } index_all_times = 1; // don't use first element of all_times @@ -120,7 +130,7 @@ void init_ilp_one(){ } if((executionProfile = (UINT64*)malloc(win_size*sizeof(UINT64))) == (UINT64*)NULL){ - fprintf(stderr,"Not enough memory (in main)\n"); + cerr << "Not enough memory (in main)" << endl; exit(1); } @@ -130,9 +140,13 @@ void init_ilp_one(){ issueTime = 0; if(interval_size != -1){ + if(interval_size % ILP_BUFFER_SIZE != 0){ + cerr << "ERROR! Interval size is not a multiple of ILP buffer size. (" << interval_size << " vs " << ILP_BUFFER_SIZE << ")" << endl; + exit(-1); + } sprintf(filename,"ilp-win%d_phases_int_pin.out", win_size); - output_file_ilp_one = fopen(filename,"w"); - fclose(output_file_ilp_one); + output_file_ilp_one.open(filename, ios::out|ios::trunc); + output_file_ilp_one.close(); } } @@ -144,7 +158,7 @@ void increase_size_all_times_one(){ ptr = (UINT64*)realloc(all_times, (1 << size_pow_times)*sizeof(UINT64)); if(ptr == (UINT64*)NULL){ - fprintf(stderr,"Could not allocate memory (realloc)!\n"); + cerr << "Could not allocate memory (realloc)!" << endl; exit(1); } all_times = ptr; @@ -174,7 +188,7 @@ VOID ilp_instr_one(){ windowHead = (windowHead + 1) % win_size_const; reordered++; } - assert(reordered != 0); + //assert(reordered != 0); } /* reset issue times */ @@ -196,18 +210,17 @@ VOID ilp_instr_intervals_one(){ ilp_instr_one(); - if(interval_ins_count == interval_size){ + if(interval_ins_count_for_hpc_alignment == interval_size){ char filename[100]; sprintf(filename,"ilp-win%d_phases_int_pin.out", win_size); - output_file_ilp_one = fopen(filename,"a"); + output_file_ilp_one.open(filename, ios::out|ios::app); - fprintf(output_file_ilp_one,"%lld",(long long)interval_size); - fprintf(output_file_ilp_one," %lld",(long long)cpuClock_interval); - fprintf(output_file_ilp_one,"\n"); + output_file_ilp_one << interval_size << " " << cpuClock_interval << endl; /* reset */ interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; cpuClock_interval = 0; @@ -215,10 +228,11 @@ VOID ilp_instr_intervals_one(){ size_pow_times = 10; free(all_times); - if((all_times = (UINT64*)malloc((1 << size_pow_times) * sizeof(UINT64))) == (UINT64*)NULL){ + all_times = (UINT64*)malloc((1 << size_pow_times) * sizeof(UINT64)); + /*if((all_times = (UINT64*)malloc((1 << size_pow_times) * sizeof(UINT64))) == (UINT64*)NULL){ fprintf(stderr,"Could not allocate memory for all_times\n"); exit(1); - } + }*/ index_all_times = 1; nlist* np; @@ -234,7 +248,7 @@ VOID ilp_instr_intervals_one(){ memAddressesTable[i] = (nlist*) NULL; } - fclose(output_file_ilp_one); + output_file_ilp_one.close(); } } @@ -288,22 +302,24 @@ VOID readMem_ilp_one(ADDRINT effAddr, ADDRINT size){ ADDRINT a; ADDRINT upperMemAddr, indexInChunk; memNode* chunk = (memNode*)NULL; - ADDRINT shiftedAddr = effAddr >> 5; - ADDRINT shiftedEndAddr = (effAddr+size) >> 5; - - for(a = shiftedAddr; a <= shiftedEndAddr; a++){ - upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; - indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); - - chunk = lookup(memAddressesTable, upperMemAddr); - if(chunk == (memNode*)NULL) - chunk = install(memAddressesTable, upperMemAddr); - - assert(indexInChunk < MAX_MEM_ENTRIES); - assert(chunk->timeAvailable[indexInChunk] < (1 << size_pow_times)); - if(all_times[chunk->timeAvailable[indexInChunk]] > issueTime) - issueTime = all_times[chunk->timeAvailable[indexInChunk]]; - } + ADDRINT shiftedAddr = effAddr >> ilp_block_size; + ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size; + + if(size > 0){ + for(a = shiftedAddr; a <= shiftedEndAddr; a++){ + upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; + indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); + + chunk = lookup(memAddressesTable, upperMemAddr); + if(chunk == (memNode*)NULL) + chunk = install(memAddressesTable, upperMemAddr); + + //assert(indexInChunk < MAX_MEM_ENTRIES); + //assert(chunk->timeAvailable[indexInChunk] < (1 << size_pow_times)); + if(all_times[chunk->timeAvailable[indexInChunk]] > issueTime) + issueTime = all_times[chunk->timeAvailable[indexInChunk]]; + } + } } VOID writeMem_ilp_one(ADDRINT effAddr, ADDRINT size){ @@ -311,27 +327,29 @@ VOID writeMem_ilp_one(ADDRINT effAddr, ADDRINT size){ ADDRINT a; ADDRINT upperMemAddr, indexInChunk; memNode* chunk = (memNode*)NULL; - ADDRINT shiftedAddr = effAddr >> 5; - ADDRINT shiftedEndAddr = (effAddr+size) >> 5; - - for(a = shiftedAddr; a <= shiftedEndAddr; a++){ - upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; - indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); - - chunk = lookup(memAddressesTable,upperMemAddr); - if(chunk == (memNode*)NULL) - chunk = install(memAddressesTable,upperMemAddr); - - assert(indexInChunk < MAX_MEM_ENTRIES); - if(chunk->timeAvailable[indexInChunk] == 0){ - index_all_times++; - if(index_all_times >= (1 << size_pow_times)) - increase_size_all_times_one(); - chunk->timeAvailable[indexInChunk] = index_all_times; - } - assert(chunk->timeAvailable[indexInChunk] < (1 << size_pow_times)); - all_times[chunk->timeAvailable[indexInChunk]] = issueTime + 1; - } + ADDRINT shiftedAddr = effAddr >> ilp_block_size; + ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size; + + if(size > 0){ + for(a = shiftedAddr; a <= shiftedEndAddr; a++){ + upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; + indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); + + chunk = lookup(memAddressesTable,upperMemAddr); + if(chunk == (memNode*)NULL) + chunk = install(memAddressesTable,upperMemAddr); + + //assert(indexInChunk < MAX_MEM_ENTRIES); + if(chunk->timeAvailable[indexInChunk] == 0){ + index_all_times++; + if(index_all_times >= (1 << size_pow_times)) + increase_size_all_times_one(); + chunk->timeAvailable[indexInChunk] = index_all_times; + } + //assert(chunk->timeAvailable[indexInChunk] < (1 << size_pow_times)); + all_times[chunk->timeAvailable[indexInChunk]] = issueTime + 1; + } + } } /* instrumenting (instruction level) */ @@ -405,19 +423,18 @@ VOID fini_ilp_one(INT32 code, VOID* v){ if(interval_size == -1){ sprintf(filename,"ilp-win%d_full_int_pin.out", win_size); - output_file_ilp_one = fopen(filename,"w"); - fprintf(output_file_ilp_one,"%lld",(long long)total_ins_count); + output_file_ilp_one.open(filename, ios::out|ios::trunc); + output_file_ilp_one << total_ins_count; } else{ sprintf(filename,"ilp-win%d_phases_int_pin.out", win_size); - output_file_ilp_one = fopen(filename,"a"); - fprintf(output_file_ilp_one,"%lld",(long long)interval_ins_count); + output_file_ilp_one.open(filename, ios::out|ios::app); + output_file_ilp_one << interval_ins_count; } - fprintf(output_file_ilp_one," %lld",(long long)cpuClock_interval); + output_file_ilp_one << " " << cpuClock_interval << endl; - fprintf(output_file_ilp_one,"\n"); - fprintf(output_file_ilp_one,"number of instructions: %lld\n", total_ins_count); - fclose(output_file_ilp_one); + output_file_ilp_one << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; + output_file_ilp_one.close(); } /*************************************** @@ -435,12 +452,14 @@ void init_ilp_all(){ size_pow_all_times_all = 10; for(i=0; i < ILP_WIN_SIZE_CNT; i++){ if((all_times_all[i] = (UINT64*)malloc((1 << size_pow_all_times_all) * sizeof(UINT64))) == (UINT64*)NULL){ - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } } index_all_times_all = 1; // don't use first element of all_times_all + ilp_block_size = _block_size; + for(j=0; j < ILP_WIN_SIZE_CNT; j++){ windowHead_all[j] = 0; windowTail_all[j] = 0; @@ -451,7 +470,7 @@ void init_ilp_all(){ } if((executionProfile_all[j] = (UINT64*)malloc(win_sizes[j]*sizeof(UINT64))) == (UINT64*)NULL){ - fprintf(stderr,"Not enough memory (in main)\n"); + cerr << "Not enough memory (in main)" << endl; exit(1); } @@ -462,8 +481,12 @@ void init_ilp_all(){ } if(interval_size != -1){ - output_file_ilp_all = fopen("ilp_phases_int_pin.out","w"); - fclose(output_file_ilp_all); + if(interval_size % ILP_BUFFER_SIZE != 0){ + cerr << "ERROR! Interval size is not a multiple of ILP buffer size. (" << interval_size << " vs " << ILP_BUFFER_SIZE << ")" << endl; + exit(-1); + } + output_file_ilp_all.open("ilp_phases_int_pin.out", ios::out|ios::trunc); + output_file_ilp_all.close(); } } @@ -476,7 +499,7 @@ void increase_size_all_times_all(){ for(i=0; i < ILP_WIN_SIZE_CNT; i++){ ptr = (UINT64*)realloc(all_times_all[i],(1 << size_pow_all_times_all)*sizeof(UINT64)); if(ptr == (UINT64*)NULL){ - fprintf(stderr,"Could not allocate memory (realloc)!\n"); + cerr << "Could not allocate memory (realloc)!" << endl; exit(1); } all_times_all[i] = ptr; @@ -497,21 +520,21 @@ VOID ilp_instr_all(){ windowTail_all[i] = (windowTail_all[i] + 1) % win_sizes[i]; /* if instruction window (issue buffer) full */ - if(windowHead_all[i] == windowTail_all[i]){ - cpuClock_all[i]++; - cpuClock_interval_all[i]++; - reordered = 0; - /* remove all instructions which are done from beginning of window, - * until an instruction comes along which is not ready yet: - * -> check executionProfile_all to see which instructions are done - * -> commit maximum win_size instructions (i.e. stop when issue buffer is empty) - */ - while((executionProfile_all[i][windowHead_all[i]] < cpuClock_all[i]) && (reordered < win_sizes[i])) { - windowHead_all[i] = (windowHead_all[i] + 1) % win_sizes[i]; - reordered++; - } - assert(reordered != 0); - } + if(windowHead_all[i] == windowTail_all[i]){ + cpuClock_all[i]++; + cpuClock_interval_all[i]++; + reordered = 0; + /* remove all instructions which are done from beginning of window, + * until an instruction comes along which is not ready yet: + * -> check executionProfile_all to see which instructions are done + * -> commit maximum win_size instructions (i.e. stop when issue buffer is empty) + */ + while((executionProfile_all[i][windowHead_all[i]] < cpuClock_all[i]) && (reordered < win_sizes[i])) { + windowHead_all[i] = (windowHead_all[i] + 1) % win_sizes[i]; + reordered++; + } + //assert(reordered != 0); + } /* reset issue times */ issueTime_all[i] = 0; @@ -533,17 +556,18 @@ VOID ilp_instr_intervals_all(){ /* counting instructions is done in all_instr_intervals() */ - if(interval_ins_count == interval_size){ + if(interval_ins_count_for_hpc_alignment == interval_size){ - output_file_ilp_all = fopen("ilp_phases_int_pin.out","a"); + output_file_ilp_all.open("ilp_phases_int_pin.out", ios::out|ios::app); - fprintf(output_file_ilp_all,"%lld",(long long)interval_ins_count); + output_file_ilp_all << interval_ins_count; for(i = 0; i < ILP_WIN_SIZE_CNT; i++) - fprintf(output_file_ilp_all," %lld",(long long)cpuClock_interval_all[i]); - fprintf(output_file_ilp_all,"\n"); + output_file_ilp_all << " " << cpuClock_interval_all[i]; + output_file_ilp_all << endl; /* reset */ interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; for(i = 0; i < ILP_WIN_SIZE_CNT; i++) cpuClock_interval_all[i] = 0; @@ -552,10 +576,11 @@ VOID ilp_instr_intervals_all(){ size_pow_all_times_all = 10; for(i = 0; i < ILP_WIN_SIZE_CNT; i++){ free(all_times_all[i]); - if((all_times_all[i] = (UINT64*)malloc((1 << size_pow_all_times_all) * sizeof(UINT64))) == (UINT64*)NULL){ + all_times_all[i] = (UINT64*)malloc((1 << size_pow_all_times_all) * sizeof(UINT64)); + /*if((all_times_all[i] = (UINT64*)malloc((1 << size_pow_all_times_all) * sizeof(UINT64))) == (UINT64*)NULL){ fprintf(stderr,"Could not allocate memory for all_times_all[%d]\n", i); exit(1); - } + }*/ } index_all_times_all = 1; @@ -572,7 +597,7 @@ VOID ilp_instr_intervals_all(){ memAddressesTable_all[i] = (nlist*) NULL; } - fclose(output_file_ilp_all); + output_file_ilp_all.close(); } ilp_instr_all(); @@ -591,7 +616,6 @@ VOID checkIssueTime_all(){ VOID readRegOp_ilp_all(UINT32 regId){ int i; - for(i=0; i < ILP_WIN_SIZE_CNT; i++){ if(timeAvailable_all[i][regId] > issueTime_all[i]) @@ -602,9 +626,9 @@ VOID readRegOp_ilp_all(UINT32 regId){ VOID writeRegOp_ilp_all(UINT32 regId){ int i; - - for(i=0; i < ILP_WIN_SIZE_CNT; i++) + for(i=0; i < ILP_WIN_SIZE_CNT; i++){ timeAvailable_all[i][regId] = issueTime_all[i] + 1; + } } /* memory access stuff */ @@ -615,24 +639,26 @@ VOID readMem_ilp_all(ADDRINT effAddr, ADDRINT size){ ADDRINT a; ADDRINT upperMemAddr, indexInChunk; memNode* chunk = (memNode*)NULL; - ADDRINT shiftedAddr = effAddr >> 5; - ADDRINT shiftedEndAddr = (effAddr+size) >> 5; - - for(a = shiftedAddr; a <= shiftedEndAddr; a++){ - upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; - indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); - - chunk = lookup(memAddressesTable_all,upperMemAddr); - if(chunk == (memNode*)NULL) - chunk = install(memAddressesTable_all,upperMemAddr); - - assert(indexInChunk < MAX_MEM_ENTRIES); - for(i=0; i < ILP_WIN_SIZE_CNT; i++){ - - if(all_times_all[i][chunk->timeAvailable[indexInChunk]] > issueTime_all[i]) - issueTime_all[i] = all_times_all[i][chunk->timeAvailable[indexInChunk]]; - } - } + ADDRINT shiftedAddr = effAddr >> ilp_block_size; + ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size; + + if(size > 0){ + for(a = shiftedAddr; a <= shiftedEndAddr; a++){ + upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; + indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); + + chunk = lookup(memAddressesTable_all,upperMemAddr); + if(chunk == (memNode*)NULL) + chunk = install(memAddressesTable_all,upperMemAddr); + + //assert(indexInChunk < MAX_MEM_ENTRIES); + for(i=0; i < ILP_WIN_SIZE_CNT; i++){ + + if(all_times_all[i][chunk->timeAvailable[indexInChunk]] > issueTime_all[i]) + issueTime_all[i] = all_times_all[i][chunk->timeAvailable[indexInChunk]]; + } + } + } } VOID writeMem_ilp_all(ADDRINT effAddr, ADDRINT size){ @@ -641,27 +667,30 @@ VOID writeMem_ilp_all(ADDRINT effAddr, ADDRINT size){ ADDRINT a; ADDRINT upperMemAddr, indexInChunk; memNode* chunk = (memNode*)NULL; - ADDRINT shiftedAddr = effAddr >> 5; - ADDRINT shiftedEndAddr = (effAddr+size) >> 5; - - for(a = shiftedAddr; a <= shiftedEndAddr; a++){ - upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; - indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); - - chunk = lookup(memAddressesTable_all,upperMemAddr); - if(chunk == (memNode*)NULL) - chunk = install(memAddressesTable_all,upperMemAddr); - - assert(indexInChunk < MAX_MEM_ENTRIES); - if(chunk->timeAvailable[indexInChunk] == 0){ - index_all_times_all++; - if(index_all_times_all >= (1 << size_pow_all_times_all)) - increase_size_all_times_all(); - chunk->timeAvailable[indexInChunk] = index_all_times_all; - } - for(i=0; i < ILP_WIN_SIZE_CNT; i++) - all_times_all[i][chunk->timeAvailable[indexInChunk]] = issueTime_all[i] + 1; - } + ADDRINT shiftedAddr = effAddr >> ilp_block_size; + ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size; + + if(size > 0){ + for(a = shiftedAddr; a <= shiftedEndAddr; a++){ + upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; + indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); + + chunk = lookup(memAddressesTable_all,upperMemAddr); + if(chunk == (memNode*)NULL) + chunk = install(memAddressesTable_all,upperMemAddr); + + //assert(indexInChunk < MAX_MEM_ENTRIES); + if(chunk->timeAvailable[indexInChunk] == 0){ + index_all_times_all++; + if(index_all_times_all >= (1 << size_pow_all_times_all)) + increase_size_all_times_all(); + chunk->timeAvailable[indexInChunk] = index_all_times_all; + } + for(i=0; i < ILP_WIN_SIZE_CNT; i++){ + all_times_all[i][chunk->timeAvailable[indexInChunk]] = issueTime_all[i] + 1; + } + } + } } /* instrumenting (instruction level) */ @@ -732,25 +761,33 @@ VOID fini_ilp_all(INT32 code, VOID* v){ fini_ilp_buffering_all(); if(interval_size == -1){ - output_file_ilp_all = fopen("ilp_full_int_pin.out","w"); - fprintf(output_file_ilp_all,"%lld",(long long)total_ins_count); + output_file_ilp_all.open("ilp_full_int_pin.out", ios::out|ios::trunc); + output_file_ilp_all << total_ins_count; } else{ - output_file_ilp_all = fopen("ilp_phases_int_pin.out","a"); - fprintf(output_file_ilp_all,"%lld",(long long)interval_ins_count); + output_file_ilp_all.open("ilp_phases_int_pin.out", ios::out|ios::app); + output_file_ilp_all << interval_ins_count; } for(i = 0; i < ILP_WIN_SIZE_CNT; i++) - fprintf(output_file_ilp_all," %lld",(long long)cpuClock_interval_all[i]); + output_file_ilp_all << " " << cpuClock_interval_all[i]; - fprintf(output_file_ilp_all,"\n"); - fprintf(output_file_ilp_all,"number of instructions: %lld\n", total_ins_count); - fclose(output_file_ilp_all); + output_file_ilp_all << endl; + output_file_ilp_all << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; + output_file_ilp_all.close(); } /************************** ILP (BUFFERING) ***************************/ +/* + * notes + * + * using PIN_FAST_ANALYSIS_CALL for buffering functions was tested + * during the preparation of MICA v0.3, but showed to slightly slowdown + * things instead of speeding them up, so it was dropped in the end + */ + /* initializing */ void init_ilp_buffering(){ @@ -759,7 +796,7 @@ void init_ilp_buffering(){ ilp_buffer_index = 0; for(i=0; i < ILP_BUFFER_SIZE; i++){ if((ilp_buffer[i] = (ilp_buffer_entry*)malloc(sizeof(ilp_buffer_entry))) == (ilp_buffer_entry*)NULL){ - fprintf(stderr,"Could not allocate memory for ilp_buffer[%d]\n",i); + cerr << "Could not allocate memory for ilp_buffer[" << i << "]" << endl; exit(1); } ilp_buffer[i]->e = (ins_buffer_entry*)NULL; @@ -771,74 +808,27 @@ void init_ilp_buffering(){ } } -ADDRINT ilp_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, ADDRINT write_addr, ADDRINT write_size){ - +VOID ilp_buffer_instruction_only(void* _e){ ilp_buffer[ilp_buffer_index]->e = (ins_buffer_entry*)_e; - ilp_buffer[ilp_buffer_index]->mem_read1_addr = read1_addr; - ilp_buffer[ilp_buffer_index]->mem_read2_addr = read2_addr; - ilp_buffer[ilp_buffer_index]->mem_read_size = read_size; - ilp_buffer[ilp_buffer_index]->mem_write_addr = write_addr; - ilp_buffer[ilp_buffer_index]->mem_write_size = write_size; - - ilp_buffer_index++; - - return (ADDRINT)(ilp_buffer_index == ILP_BUFFER_SIZE); } -ADDRINT ilp_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT read_size, ADDRINT write_addr, ADDRINT write_size){ - - ilp_buffer[ilp_buffer_index]->e = (ins_buffer_entry*)_e; +VOID ilp_buffer_instruction_read(ADDRINT read1_addr, ADDRINT read_size){ ilp_buffer[ilp_buffer_index]->mem_read1_addr = read1_addr; ilp_buffer[ilp_buffer_index]->mem_read_size = read_size; - ilp_buffer[ilp_buffer_index]->mem_write_addr = write_addr; - ilp_buffer[ilp_buffer_index]->mem_write_size = write_size; - - ilp_buffer_index++; - - return (ADDRINT)(ilp_buffer_index == ILP_BUFFER_SIZE); } -ADDRINT ilp_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size){ - - ilp_buffer[ilp_buffer_index]->e = (ins_buffer_entry*)_e; - ilp_buffer[ilp_buffer_index]->mem_read1_addr = read1_addr; +VOID ilp_buffer_instruction_read2(ADDRINT read2_addr){ ilp_buffer[ilp_buffer_index]->mem_read2_addr = read2_addr; - ilp_buffer[ilp_buffer_index]->mem_read_size = read_size; - - ilp_buffer_index++; - - return (ADDRINT)(ilp_buffer_index == ILP_BUFFER_SIZE); } -ADDRINT ilp_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_size){ - - ilp_buffer[ilp_buffer_index]->e = (ins_buffer_entry*)_e; - ilp_buffer[ilp_buffer_index]->mem_read1_addr = read1_addr; - ilp_buffer[ilp_buffer_index]->mem_read_size = read_size; - - ilp_buffer_index++; - - return (ADDRINT)(ilp_buffer_index == ILP_BUFFER_SIZE); -} - -ADDRINT ilp_buffer_instruction_write(void* _e, ADDRINT write_addr, ADDRINT write_size){ - - ilp_buffer[ilp_buffer_index]->e = (ins_buffer_entry*)_e; +VOID ilp_buffer_instruction_write(ADDRINT write_addr, ADDRINT write_size){ ilp_buffer[ilp_buffer_index]->mem_write_addr = write_addr; ilp_buffer[ilp_buffer_index]->mem_write_size = write_size; - - ilp_buffer_index++; - - return (ADDRINT)(ilp_buffer_index == ILP_BUFFER_SIZE); } -ADDRINT ilp_buffer_instruction(void* _e){ - - ilp_buffer[ilp_buffer_index]->e = (ins_buffer_entry*)_e; - - ilp_buffer_index++; - - return (ADDRINT)(ilp_buffer_index == ILP_BUFFER_SIZE); +ADDRINT ilp_buffer_instruction_next(){ + ilp_buffer_index++; + return (ADDRINT)(ilp_buffer_index == ILP_BUFFER_SIZE || interval_ins_count_for_hpc_alignment == interval_size); } /* empty buffer for one given window size */ @@ -955,7 +945,7 @@ VOID instrument_ilp_buffering_common(INS ins, ins_buffer_entry* e){ regReadCnt = 0; for(i=0; i < maxNumRegsCons; i++){ reg = INS_RegR(ins, i); - assert((UINT32)reg < MAX_NUM_REGS); + //assert((UINT32)reg < MAX_NUM_REGS); // only consider valid general-purpose registers (any bit-width) and floating-point registers, // i.e. exlude branch, segment and pin registers, among others if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -964,17 +954,18 @@ VOID instrument_ilp_buffering_common(INS ins, ins_buffer_entry* e){ } e->regReadCnt = regReadCnt; - if((e->regsRead = (REG*)malloc(regReadCnt*sizeof(REG))) == (REG*)NULL){ - fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", e->insAddr); + e->regsRead = (REG*)malloc(regReadCnt*sizeof(REG)); + /*if((e->regsRead = (REG*)malloc(regReadCnt*sizeof(REG))) == (REG*)NULL){ + fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", (unsigned int)e->insAddr); exit(1); - } + }*/ regReadCnt = 0; for(i=0; i < maxNumRegsCons; i++){ reg = INS_RegR(ins, i); - assert((UINT32)reg < MAX_NUM_REGS); + //assert((UINT32)reg < MAX_NUM_REGS); // only consider valid general-purpose registers (any bit-width) and floating-point registers, // i.e. exlude branch, segment and pin registers, among others if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -995,7 +986,7 @@ VOID instrument_ilp_buffering_common(INS ins, ins_buffer_entry* e){ reg = INS_RegW(ins, i); - assert((UINT32)reg < MAX_NUM_REGS); + //assert((UINT32)reg < MAX_NUM_REGS); // only consider valid general-purpose registers (any bit-width) and floating-point registers, // i.e. exlude branch, segment and pin registers, among others */ if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -1004,17 +995,18 @@ VOID instrument_ilp_buffering_common(INS ins, ins_buffer_entry* e){ } e->regWriteCnt = regWriteCnt; - if((e->regsWritten = (REG*)malloc(regWriteCnt*sizeof(REG))) == (REG*)NULL){ - fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", e->insAddr); + e->regsWritten = (REG*)malloc(regWriteCnt*sizeof(REG)); + /*if((e->regsWritten = (REG*)malloc(regWriteCnt*sizeof(REG))) == (REG*)NULL){ + fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", (unsigned int)e->insAddr); exit(1); - } + }*/ regWriteCnt = 0; for(i=0; i < maxNumRegsProd; i++){ reg = INS_RegW(ins, i); - assert((UINT32)reg < MAX_NUM_REGS); + //assert((UINT32)reg < MAX_NUM_REGS); // only consider valid general-purpose registers (any bit-width) and floating-point registers, // i.e. exlude branch, segment and pin registers, among others if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -1026,38 +1018,22 @@ VOID instrument_ilp_buffering_common(INS ins, ins_buffer_entry* e){ } // buffer memory operations (and instruction register buffer) with one single InsertCall - if(INS_IsMemoryRead(ins)){ - - if(INS_IsMemoryWrite(ins)){ - if(INS_HasMemoryRead2(ins)){ - - INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_2reads_write, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END); - } - else{ - INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_read_write, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END); - - } - } - else{ - if(INS_HasMemoryRead2(ins)){ - - INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_2reads, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END); - } - else{ - - INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_read, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END); - } - } - } - else{ - if(INS_IsMemoryWrite(ins)){ - - INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_write, IARG_PTR, (void*)e, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END); - } - else{ - INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction, IARG_PTR, (void*)e, IARG_END); - } - } + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_only, IARG_PTR, (void*)e, IARG_END); + + if(INS_IsMemoryRead(ins)){ + + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_read, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END); + + if(INS_HasMemoryRead2(ins)){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_read2, IARG_MEMORYREAD2_EA, IARG_END); + } + } + + if(INS_IsMemoryWrite(ins)){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_write, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END); + } + + INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_next, IARG_END); } diff --git a/mica_ilp.h b/mica_ilp.h index bbac38b..d47cf15 100644 --- a/mica_ilp.h +++ b/mica_ilp.h @@ -20,10 +20,19 @@ VOID fini_ilp_all(INT32 code, VOID* v); VOID fini_ilp_one(INT32 code, VOID* v); /* support for fast instrumentation of all characteristics in a single run (avoid multiple InsertCalls!) */ -ADDRINT ilp_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, ADDRINT write_addr, ADDRINT write_size); +//void ilp_buffer_instruction_only(void* _e); +VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_only(void* _e); +//void ilp_buffer_instruction_read(ADDRINT read1_addr, ADDRINT read_size); +VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_read(ADDRINT read1_addr, ADDRINT read_size); +//void ilp_buffer_instruction_read2(ADDRINT read2_addr); +VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_read2(ADDRINT read2_addr); +//void ilp_buffer_instruction_write(ADDRINT write_addr, ADDRINT write_size); +VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_write(ADDRINT write_addr, ADDRINT write_size); +ADDRINT ilp_buffer_instruction_next(); +/*ADDRINT ilp_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, ADDRINT write_addr, ADDRINT write_size); ADDRINT ilp_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT read_size, ADDRINT write_addr, ADDRINT write_size); ADDRINT ilp_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size); ADDRINT ilp_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_size); ADDRINT ilp_buffer_instruction_write(void* _e, ADDRINT write_addr, ADDRINT write_size); -ADDRINT ilp_buffer_instruction(void* _e); +ADDRINT ilp_buffer_instruction(void* _e);*/ VOID empty_ilp_buffer_all(); diff --git a/mica_init.cpp b/mica_init.cpp index b92e642..d53d279 100644 --- a/mica_init.cpp +++ b/mica_init.cpp @@ -13,14 +13,16 @@ /* * Setup MICA log file. */ -void setup_mica_log(FILE* *log){ +void setup_mica_log(ofstream *log){ char name[20]; sprintf(name, "mica.log"); - *log = fopen(name,"w"); - if( *log == (FILE*)NULL ){ - fprintf(stderr,"Could not create mica.log, aborting!\n"); + //*log = fopen(name,"w"); + (*log).open(name, ios::out|ios::trunc); + //if( *log == (FILE*)NULL ){ + if( ! (*log).is_open() ){ + cerr << "Could not create mica.log, aborting!" << endl; exit(1); } } @@ -28,90 +30,90 @@ void setup_mica_log(FILE* *log){ /* * Read mica.conf config file for MICA. * - * analysis_type: 'all' | 'ilp' | 'ilp_one' | 'itypes' | 'ppm' | 'reg' | 'stride' | 'workingset' | 'custom' + * analysis_type: 'all' | 'ilp' | 'ilp_one' | 'itypes' | 'ppm' | 'reg' | 'stride' | 'memfootprint' | 'memreusedist' | 'custom' * interval_size: 'full' | * ilp_size: * itypes_spec_file: */ -void read_config(FILE* log, INT64* interval_size, MODE* mode, UINT32* _ilp_win_size, char** _itypes_spec_file){ +void read_config(ofstream* log, INT64* interval_size, MODE* mode, UINT32* _ilp_win_size, UINT32* _block_size, UINT32* _page_size, char** _itypes_spec_file){ char* string; FILE* config_file = fopen("mica.conf","r"); /* a config file named 'mica.conf' is required */ if(config_file == (FILE*)NULL){ - ERROR("ERROR: No config file 'mica.conf' found, please create one!\n"); - LOG_MSG("ERROR: No config file 'mica.conf' found, please create one!\n"); + cerr << "ERROR: No config file 'mica.conf' found, please create one!" << endl; + (*log) << "ERROR: No config file 'mica.conf' found, please create one!" << endl; exit(1); } - LOG_MSG("Reading config file ...\n"); + (*log) << "Reading config file ..." << endl; if((string = (char*)malloc(100*sizeof(char))) == (char*)NULL){ - ERROR("ERROR: Could not allocate memory for string\n"); - LOG_MSG("ERROR: Could not allocate memory for string\n"); + cerr << "ERROR: Could not allocate memory for string" << endl; + (*log) << "ERROR: Could not allocate memory for string" << endl; exit(1); } fscanf(config_file,"analysis_type: %s\n",string); - DEBUG_MSG("Analysis type: %s\n",string); + cerr << "Analysis type: " << string << endl; // figure out mode we are running in if(strcmp(string,"all") == 0){ *mode = MODE_ALL; - LOG_MSG("Measuring ALL characteristics...\n"); + (*log) << "Measuring ALL characteristics..." << endl; } else{ if(strcmp(string,"ilp") == 0){ *mode = MODE_ILP; - LOG_MSG("Measuring ILP characteristics...\n"); + (*log) << "Measuring ILP characteristics..." << endl; } else{ if(strcmp(string,"ilp_one") == 0){ *mode = MODE_ILP_ONE; - LOG_MSG("Measuring ILP characteristics for a given window size...\n"); + (*log) << "Measuring ILP characteristics for a given window size..." << endl; } else{ if(strcmp(string,"itypes") == 0){ *mode = MODE_ITYPES; - LOG_MSG("Measuring ITYPES characteristics...\n"); + (*log) << "Measuring ITYPES characteristics..." << endl; } else{ if(strcmp(string,"ppm") == 0){ *mode = MODE_PPM; - LOG_MSG("Measuring PPM characteristics...\n"); + (*log) << "Measuring PPM characteristics..." << endl; } else{ if(strcmp(string,"reg") == 0){ *mode = MODE_REG; - LOG_MSG("Measuring REG characteristics...\n"); + (*log) << "Measuring REG characteristics..." << endl; } else{ if(strcmp(string,"stride") == 0){ *mode = MODE_STRIDE; - LOG_MSG("Measuring STRIDE characteristics...\n"); + (*log) << "Measuring STRIDE characteristics..." << endl; } else{ if(strcmp(string,"memfootprint") == 0){ *mode = MODE_MEMFOOTPRINT; - LOG_MSG("Measuring MEMFOOTPRINT characteristics...\n"); + (*log) << "Measuring MEMFOOTPRINT characteristics..." << endl; } else{ if(strcmp(string,"memreusedist") == 0){ *mode = MODE_MEMREUSEDIST; - LOG_MSG("Measuring MEMREUSEDIST characteristics...\n"); + (*log) << "Measuring MEMREUSEDIST characteristics..." << endl; } else{ if(strcmp(string,"custom") == 0){ *mode = MODE_CUSTOM; - LOG_MSG("Measuring CUSTOM characteristics...\n"); + (*log) << "Measuring CUSTOM characteristics..." << endl; } else{ - LOG_MSG("\nERROR: Unknown set of characteristics chosen!\n"); - LOG_MSG(" Available characteristics include: 'all', 'ilp', 'ilp_one', 'itypes', 'ppm', 'reg', 'stride', 'memfootprint', 'memreusedist', 'custom'\n"); - ERROR("\nERROR: Unknown set of characteristics chosen!\n"); - ERROR(" Available characteristics include: 'all', 'ilp', 'ilp_one', 'itypes', 'ppm', 'reg', 'stride', 'memfootprint', 'memreusedist', 'custom'\n"); + (*log) << endl << "ERROR: Unknown set of characteristics chosen!" << endl; + (*log) << " Available characteristics include: 'all', 'ilp', 'ilp_one', 'itypes', 'ppm', 'reg', 'stride', 'memfootprint', 'memreusedist', 'custom'" << endl; + cerr << endl << "ERROR: Unknown set of characteristics chosen!" << endl; + cerr << endl << " Available characteristics include: 'all', 'ilp', 'ilp_one', 'itypes', 'ppm', 'reg', 'stride', 'memfootprint', 'memreusedist', 'custom'" << endl; } } } @@ -125,40 +127,59 @@ void read_config(FILE* log, INT64* interval_size, MODE* mode, UINT32* _ilp_win_s fscanf(config_file,"interval_size: %s\n", string); - LOG_MSG("Interval size: %s\n", string); + (*log) << "Interval size: " << string << endl; if(strcmp(string, "full") == 0){ *interval_size = -1; - LOG_MSG("Returning data for full execution...\n"); + (*log) << "Returning data for full execution..." << endl; } else{ *interval_size = (INT64) atoll(string); - LOG_MSG("Returning data for each interval of %lld instructions...\n", (INT64)*interval_size); + (*log) << "Returning data for each interval of " << *interval_size << " instructions..." << endl; } // read window size for ILP_ONE if(*mode == MODE_ILP_ONE){ if(fscanf(config_file,"ilp_size: %s\n", string) == 1){ *_ilp_win_size = (UINT32)atoi(string); - LOG_MSG("ILP window size: %d\n", *_ilp_win_size); + (*log) << "ILP window size: " << *_ilp_win_size << endl; } else{ - fprintf(stderr, "ERROR! ILP_ONE mode was specified, but no window size was found along with it!\n"); + cerr << "ERROR! ILP_ONE mode was specified, but no window size was found along with it!" << endl; exit(-1); } } + // read block size + *_block_size = 6; // default block size = 64 bytes (2^6) + if(*mode == MODE_ILP_ONE || *mode == MODE_ILP || *mode == MODE_MEMFOOTPRINT || *mode == MODE_MEMREUSEDIST || *mode == MODE_ALL){ + if(fscanf(config_file,"block_size: %s\n", string) == 1){ + *_block_size = (UINT32)atoi(string); + (*log) << "block size: 2^" << *_block_size << endl; + } + } + + // read page size + *_page_size = 12; // default page size = 4KB (2^12) + if(*mode == MODE_MEMFOOTPRINT || *mode == MODE_ALL){ + if(fscanf(config_file,"page_size: %s\n", string) == 1){ + *_page_size = (UINT32)atoi(string); + (*log) << "page size: 2^" << *_page_size << endl; + } + } + // possibly read itypes specification filename *_itypes_spec_file = NULL; if(*mode == MODE_ITYPES || *mode == MODE_ALL){ if(fscanf(config_file,"itypes_spec_file: %s\n", string) == 1){ *_itypes_spec_file = (char*)malloc((strlen(string)+1)*sizeof(char)); strcpy(*_itypes_spec_file, string); - fprintf(stdout,"ITYPES spec file: %s\n", *_itypes_spec_file); - LOG_MSG("ITYPES spec file: %s\n", *_itypes_spec_file); + (*log) << "ITYPES spec file: " << *_itypes_spec_file << endl; } } - DEBUG_MSG("All done reading config\n"); + cerr << "All done reading config" << endl; + + (*log).close(); } diff --git a/mica_init.h b/mica_init.h index 3075264..f64454c 100644 --- a/mica_init.h +++ b/mica_init.h @@ -16,8 +16,8 @@ #include "mica_memfootprint.h" #include "mica_memreusedist.h" -enum MODE { MODE_ALL, MODE_ILP, MODE_ILP_ONE, MODE_ITYPES, MODE_PPM, MODE_REG, MODE_STRIDE, MODE_MEMFOOTPRINT, MODE_MEMREUSEDIST, MODE_MYTYPE }; +enum MODE { MODE_ALL, MODE_ILP, MODE_ILP_ONE, MODE_ITYPES, MODE_PPM, MODE_REG, MODE_STRIDE, MODE_MEMFOOTPRINT, MODE_MEMREUSEDIST, MODE_CUSTOM }; -void setup_mica_log(FILE* *log); +void setup_mica_log(ofstream *log); -void read_config(FILE* log, INT64* interval_size, MODE* mode, UINT32* _win_size); +void read_config(ofstream *log, INT64* interval_size, MODE* mode, UINT32* _ilp_win_size, UINT32* _block_size, UINT32* _page_size, char** _itypes_spec_file); diff --git a/mica_itypes.cpp b/mica_itypes.cpp index c08e817..49619ff 100644 --- a/mica_itypes.cpp +++ b/mica_itypes.cpp @@ -16,10 +16,12 @@ extern INT64 interval_size; extern INT64 interval_ins_count; +extern INT64 interval_ins_count_for_hpc_alignment; extern INT64 total_ins_count; +extern INT64 total_ins_count_for_hpc_alignment; extern char* _itypes_spec_file; -FILE* output_file_itypes; +ofstream output_file_itypes; identifier** group_identifiers; INT64* group_ids_cnt; @@ -32,18 +34,18 @@ identifier* other_group_identifiers; /* counter functions */ ADDRINT itypes_instr_intervals(){ - return (ADDRINT)(total_ins_count % interval_size == 0); + return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); }; VOID itypes_instr_interval_output(){ int i; - output_file_itypes = fopen("itypes_phases_int_pin.out","a"); - fprintf(output_file_itypes, "%lld", (long long)interval_size); + output_file_itypes.open("itypes_phases_int_pin.out", ios::out|ios::app); + output_file_itypes << interval_size; for(i=0; i < number_of_groups+1; i++){ - fprintf(output_file_itypes, " %lld", group_counts[i]); + output_file_itypes << " " << group_counts[i]; } - fprintf(output_file_itypes, "\n"); - fclose(output_file_itypes); + output_file_itypes << endl; + output_file_itypes.close(); } VOID itypes_instr_interval_reset(){ @@ -58,6 +60,7 @@ VOID itypes_instr_interval(){ itypes_instr_interval_output(); itypes_instr_interval_reset(); interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; } VOID itypes_count(UINT32 gid){ @@ -173,7 +176,7 @@ VOID init_itypes_default_groups(){ strcpy(group_identifiers[8][1].str, "SSE"); // other (interrupts, rotate instructions, semaphore, conditional move, system) - group_ids_cnt[9] = 7; + group_ids_cnt[9] = 8; group_identifiers[9] = (identifier*)malloc(group_ids_cnt[9]*sizeof(identifier)); group_identifiers[9][0].type = identifier_type::ID_TYPE_CATEGORY; group_identifiers[9][0].str = (char*)malloc(20*sizeof(char)); @@ -196,6 +199,9 @@ VOID init_itypes_default_groups(){ group_identifiers[9][6].type = identifier_type::ID_TYPE_CATEGORY; group_identifiers[9][6].str = (char*)malloc(20*sizeof(char)); strcpy(group_identifiers[9][6].str, "PREFETCH"); + group_identifiers[9][7].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[9][7].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[9][7].str, "SYSCALL"); // [!] NOP instructions group_ids_cnt[10] = 2; @@ -215,21 +221,22 @@ VOID init_itypes(){ int gid, sgid; char type[100]; char str[100]; + string line; /* try and open instruction groups specification file */ if(_itypes_spec_file != NULL){ - FILE* f = fopen(_itypes_spec_file, "r"); - if(f != NULL){ + ifstream f(_itypes_spec_file); + if(f){ // count number of groups number_of_groups = 0; - while( feof(f) == 0){ - fscanf(f, "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); + while( getline(f,line)){ + sscanf(line.c_str(), "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); if(gid > number_of_groups) number_of_groups++; } - fclose(f); + f.close(); number_of_groups++; - fprintf(stderr, "==> found %lld groups\n", (long long)number_of_groups); + cerr << "==> found " << number_of_groups << " groups" << endl; group_identifiers = (identifier**)malloc((number_of_groups+1)*sizeof(identifier*)); group_ids_cnt = (INT64*)malloc((number_of_groups+1)*sizeof(INT64)); @@ -239,10 +246,10 @@ VOID init_itypes(){ } // count number of subgroups per group - f = fopen(_itypes_spec_file, "r"); + f.open(_itypes_spec_file); i=0; - while( feof(f) == 0){ - fscanf(f, "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); + while( getline(f,line)){ + sscanf(line.c_str(), "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); if(gid == i){ group_ids_cnt[i]++; } @@ -253,13 +260,13 @@ VOID init_itypes(){ } } group_identifiers[i] = (identifier*)malloc(group_ids_cnt[i]*sizeof(identifier)); - fclose(f); + f.close(); // save subgroup types and identifiers - f = fopen(_itypes_spec_file, "r"); + f.open(_itypes_spec_file); i=0; - while( feof(f) == 0){ - fscanf(f, "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); + while( getline(f,line)){ + sscanf(line.c_str(), "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); if(strcmp(type, "CATEGORY") == 0){ group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_CATEGORY; } @@ -272,8 +279,8 @@ VOID init_itypes(){ group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_SPECIAL; } else{ - fprintf(stderr, "ERROR! Unknown subgroup type found (\"%s\").\n", type); - fprintf(stderr, " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}.\n"); + cerr << "ERROR! Unknown subgroup type found (\"" << type << "\")." << endl; + cerr << " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}." << endl; exit(-1); } } @@ -281,54 +288,54 @@ VOID init_itypes(){ group_identifiers[gid][sgid].str = (char*)malloc(20*sizeof(char)); strcpy(group_identifiers[gid][sgid].str, str); } - fclose(f); + f.close(); // print out groups read for(i=0; i < number_of_groups; i++){ - fprintf(stderr, " group %d (#: %lld): ", i, (long long)group_ids_cnt[i]); + cerr << " group " << i << " (#: " << group_ids_cnt[i] << "): "; for(j=0; j < group_ids_cnt[i]; j++){ - fprintf(stderr, "%s ", group_identifiers[i][j].str); + cerr << group_identifiers[i][j].str << " "; switch(group_identifiers[i][j].type){ case identifier_type::ID_TYPE_CATEGORY: - fprintf(stderr, "[CAT]; "); + cerr << "[CAT]; "; break; case identifier_type::ID_TYPE_OPCODE: - fprintf(stderr, "[OPCODE]; "); + cerr << "[OPCODE]; "; break; case identifier_type::ID_TYPE_SPECIAL: - fprintf(stderr, "[SPECIAL]; "); + cerr << "[SPECIAL]; "; break; default: - fprintf(stderr, "ERROR! Unknown subgroup type found for [%d][%d] (\"%d\").\n", i, j, group_identifiers[i][j].type); - fprintf(stderr, " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}.\n"); + cerr << "ERROR! Unknown subgroup type found for [" << i << "][" << j << "] (\"" << group_identifiers[i][j].type << "\")." << endl; + cerr << " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}." << endl; exit(-1); break; } } - fprintf(stderr, "\n"); + cerr << endl; } - - // allocate space for identifiers of 'other' group - other_ids_cnt = 0; - other_ids_max_cnt = 2; - other_group_identifiers = (identifier*)malloc(other_ids_max_cnt*sizeof(identifier)); } else{ - fprintf(stderr, "ERROR! Failed to open file \"%s\" containing instruction groups specification.\n", _itypes_spec_file); + cerr << "ERROR! Failed to open file \"" << _itypes_spec_file << "\" containing instruction groups specification." << endl; exit(-1); } } - else{ - // if no specification file was found, just use defaults (compatible with MICA v0.23 and older) + else{ + // if no specification file was found, just use defaults (compatible with MICA v0.23 and older) init_itypes_default_groups(); - } + } - // (initializing total instruction counts is done in mica.cpp) - - if(interval_size != -1){ - output_file_itypes = fopen("itypes_phases_int_pin.out","w"); - fclose(output_file_itypes); - } + // allocate space for identifiers of 'other' group + other_ids_cnt = 0; + other_ids_max_cnt = 2; + other_group_identifiers = (identifier*)malloc(other_ids_max_cnt*sizeof(identifier)); + + // (initializing total instruction counts is done in mica.cpp) + + if(interval_size != -1){ + output_file_itypes.open("itypes_phases_int_pin.out", ios::out|ios::trunc); + output_file_itypes.close(); + } } /* instrumenting (instruction level) */ @@ -379,7 +386,7 @@ VOID instrument_itypes(INS ins, VOID* v){ } } else{ - fprintf(stderr, "ERROR! Unknown identifier type specified (%d).\n", group_identifiers[i][j].type); + cerr << "ERROR! Unknown identifier type specified (" << group_identifiers[i][j].type << ")." << endl; } } } @@ -405,7 +412,7 @@ VOID instrument_itypes(INS ins, VOID* v){ } // prepare for (possible) next category - if(other_ids_cnt == other_ids_max_cnt){ + if(other_ids_cnt >= other_ids_max_cnt){ other_ids_max_cnt *= 2; other_group_identifiers = (identifier*)realloc(other_group_identifiers, other_ids_max_cnt*sizeof(identifier)); } @@ -424,26 +431,28 @@ VOID fini_itypes(INT32 code, VOID* v){ int i; if(interval_size == -1){ - output_file_itypes = fopen("itypes_full_int_pin.out","w"); - fprintf(output_file_itypes, "%lld", (long long)total_ins_count); + output_file_itypes.open("itypes_full_int_pin.out", ios::out|ios::trunc); + output_file_itypes << total_ins_count; for(i=0; i < number_of_groups+1; i++){ - fprintf(output_file_itypes, " %lld", group_counts[i]); + output_file_itypes << " " << group_counts[i]; } - fprintf(output_file_itypes, "\n"); + output_file_itypes << endl; } else{ - output_file_itypes = fopen("itypes_phases_int_pin.out","a"); - fprintf(output_file_itypes, "%lld", (long long)interval_ins_count); + output_file_itypes.open("itypes_phases_int_pin.out", ios::out|ios::app); + output_file_itypes << interval_ins_count; for(i=0; i < number_of_groups+1; i++){ - fprintf(output_file_itypes, " %lld", group_counts[i]); + output_file_itypes << " " << group_counts[i]; } + output_file_itypes << endl; } - fprintf(output_file_itypes,"number of instructions: %lld\n", total_ins_count); - fclose(output_file_itypes); + output_file_itypes << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; + output_file_itypes.close(); // print instruction categories in 'other' group of instructions - FILE* output_file_other_group_categories = fopen("itypes_other_group_categories.txt", "w"); + ofstream output_file_other_group_categories; + output_file_other_group_categories.open("itypes_other_group_categories.txt", ios::out|ios::trunc); for(i=0; i < other_ids_cnt; i++){ - fprintf(output_file_other_group_categories, "%s\n", other_group_identifiers[i].str); + output_file_other_group_categories << other_group_identifiers[i].str << endl; } } diff --git a/mica_itypes.h b/mica_itypes.h index 2688270..6bde0ea 100644 --- a/mica_itypes.h +++ b/mica_itypes.h @@ -9,23 +9,28 @@ #include "mica.h" -void init_itypes(); +#ifndef MICA_ITYPES_H +#define MICA_ITYPES_H + +typedef struct identifier_type{ + // type of identifier + // SPECIAL includes stuff like memory reads/writes + enum {ID_TYPE_CATEGORY = 1, ID_TYPE_OPCODE, ID_TYPE_SPECIAL} type; + // string identifier for category/opcode + char* str; +} identifier; + +VOID init_itypes(); +VOID init_itypes_default_groups(); + VOID instrument_itypes(INS ins, VOID* v); VOID instrument_itypes_bbl(TRACE trace, VOID* v); VOID fini_itypes(INT32 code, VOID* v); -VOID itypes_count_mem_read(); -VOID itypes_count_mem_write(); -VOID itypes_count_control(); -VOID itypes_count_arith(); -VOID itypes_count_fp(); -VOID itypes_count_stack(); -VOID itypes_count_shift(); -VOID itypes_count_string(); -VOID itypes_count_sse(); -VOID itypes_count_other(); -VOID itypes_count_nop(); +VOID itypes_count(UINT32 gid); VOID itypes_instr_interval_output(); VOID itypes_instr_interval_reset(); + +#endif diff --git a/mica_memfootprint.cpp b/mica_memfootprint.cpp index 7eb1639..7b826f5 100644 --- a/mica_memfootprint.cpp +++ b/mica_memfootprint.cpp @@ -17,9 +17,16 @@ extern INT64 interval_size; extern INT64 interval_ins_count; +extern INT64 interval_ins_count_for_hpc_alignment; extern INT64 total_ins_count; +extern INT64 total_ins_count_for_hpc_alignment; -FILE* output_file_memfootprint; +extern UINT32 _block_size; +UINT32 memfootprint_block_size; +extern UINT32 _page_size; +UINT32 page_size; + +ofstream output_file_memfootprint; nlist* DmemCacheWorkingSetTable[MAX_MEM_TABLE_ENTRIES]; nlist* DmemPageWorkingSetTable[MAX_MEM_TABLE_ENTRIES]; @@ -37,10 +44,13 @@ void init_memfootprint(){ ImemCacheWorkingSetTable[i] = (nlist*) NULL; ImemPageWorkingSetTable[i] = (nlist*) NULL; } + + memfootprint_block_size = _block_size; + page_size = _page_size; if(interval_size != -1){ - output_file_memfootprint = fopen("memfootprint_phases_int_pin.out","w"); - fclose(output_file_memfootprint); + output_file_memfootprint.open("memfootprint_phases_int_pin.out", ios::out|ios::trunc); + output_file_memfootprint.close(); } } @@ -52,41 +62,45 @@ VOID memOp(ADDRINT effMemAddr, ADDRINT size){ /* D-stream (64-byte) cache block memory footprint */ - addr = effMemAddr >> 6; - endAddr = (effMemAddr + size) >> 6; + addr = effMemAddr >> memfootprint_block_size; + endAddr = (effMemAddr + size - 1) >> memfootprint_block_size; - for(a = addr; a <= endAddr; a++){ + if(size > 0){ + for(a = addr; a <= endAddr; a++){ - upperAddr = a >> LOG_MAX_MEM_BLOCK; - indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); + upperAddr = a >> LOG_MAX_MEM_BLOCK; + indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); - chunk = lookup(DmemCacheWorkingSetTable, upperAddr); - if(chunk == (memNode*)NULL) - chunk = install(DmemCacheWorkingSetTable, upperAddr); + chunk = lookup(DmemCacheWorkingSetTable, upperAddr); + if(chunk == (memNode*)NULL) + chunk = install(DmemCacheWorkingSetTable, upperAddr); - //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); - chunk->numReferenced[indexInChunk] = true; + //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); + chunk->numReferenced[indexInChunk] = true; - } + } + } /* D-stream (4KB) page block memory footprint */ - addr = effMemAddr >> 12; - endAddr = (effMemAddr + size) >> 12; + addr = effMemAddr >> page_size; + endAddr = (effMemAddr + size - 1) >> page_size; - for(a = addr; a <= endAddr; a++){ + if(size > 0){ + for(a = addr; a <= endAddr; a++){ - upperAddr = a >> LOG_MAX_MEM_BLOCK; - indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); + upperAddr = a >> LOG_MAX_MEM_BLOCK; + indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); - chunk = lookup(DmemPageWorkingSetTable, upperAddr); - if(chunk == (memNode*)NULL) - chunk = install(DmemPageWorkingSetTable, upperAddr); + chunk = lookup(DmemPageWorkingSetTable, upperAddr); + if(chunk == (memNode*)NULL) + chunk = install(DmemPageWorkingSetTable, upperAddr); - //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); - chunk->numReferenced[indexInChunk] = true; + //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); + chunk->numReferenced[indexInChunk] = true; - } + } + } } VOID instrMem(ADDRINT instrAddr, ADDRINT size){ @@ -98,40 +112,44 @@ VOID instrMem(ADDRINT instrAddr, ADDRINT size){ /* I-stream (64-byte) cache block memory footprint */ - addr = instrAddr >> 6; - endAddr = (instrAddr + size) >> 6; + addr = instrAddr >> memfootprint_block_size; + endAddr = (instrAddr + size - 1) >> memfootprint_block_size; + + if(size > 0){ + for(a = addr; a <= endAddr; a++){ - for(a = addr; a <= endAddr; a++){ + upperAddr = a >> LOG_MAX_MEM_BLOCK; + indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); - upperAddr = a >> LOG_MAX_MEM_BLOCK; - indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); + chunk = lookup(ImemCacheWorkingSetTable, upperAddr); + if(chunk == (memNode*)NULL) + chunk = install(ImemCacheWorkingSetTable, upperAddr); - chunk = lookup(ImemCacheWorkingSetTable, upperAddr); - if(chunk == (memNode*)NULL) - chunk = install(ImemCacheWorkingSetTable, upperAddr); + //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); + chunk->numReferenced[indexInChunk] = true; - assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); - chunk->numReferenced[indexInChunk] = true; - - } + } + } /* I-stream (4KB) page block memory footprint */ - addr = instrAddr >> 12; - endAddr = (instrAddr + size) >> 12; + addr = instrAddr >> page_size; + endAddr = (instrAddr + size - 1) >> page_size; - for(a = addr; a <= endAddr; a++){ + if(size > 0){ + for(a = addr; a <= endAddr; a++){ - upperAddr = a >> LOG_MAX_MEM_BLOCK; - indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); + upperAddr = a >> LOG_MAX_MEM_BLOCK; + indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); - chunk = lookup(ImemPageWorkingSetTable, upperAddr); - if(chunk == (memNode*)NULL) - chunk = install(ImemPageWorkingSetTable, upperAddr); + chunk = lookup(ImemPageWorkingSetTable, upperAddr); + if(chunk == (memNode*)NULL) + chunk = install(ImemPageWorkingSetTable, upperAddr); - assert(indexInChunk > 0 && indexInChunk < MAX_MEM_BLOCK); - chunk->numReferenced[indexInChunk] = true; - } + //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); + chunk->numReferenced[indexInChunk] = true; + } + } } VOID memfootprint_instr_full(ADDRINT instrAddr, ADDRINT size){ @@ -146,12 +164,12 @@ ADDRINT memfootprint_instr_intervals(ADDRINT instrAddr, ADDRINT size){ /* counting instructions is done in all_instr_intervals() */ instrMem(instrAddr, size); - return (ADDRINT)(total_ins_count%interval_size == 0); + return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); } VOID memfootprint_instr_interval_output(){ - output_file_memfootprint = fopen("memfootprint_phases_int_pin.out","a"); + output_file_memfootprint.open("memfootprint_phases_int_pin.out", ios::out|ios::app); int i,j; nlist* np; @@ -196,8 +214,8 @@ VOID memfootprint_instr_interval_output(){ } } } - fprintf(output_file_memfootprint, "%lld %lld %lld %lld\n", DmemCacheWorkingSetSize, DmemPageWorkingSetSize, ImemCacheWorkingSetSize, ImemPageWorkingSetSize); - fclose(output_file_memfootprint); + output_file_memfootprint << DmemCacheWorkingSetSize << " " << DmemPageWorkingSetSize << " " << ImemCacheWorkingSetSize << " " << ImemPageWorkingSetSize << endl; + output_file_memfootprint.close(); } VOID memfootprint_instr_interval_reset(){ @@ -250,6 +268,7 @@ VOID memfootprint_instr_interval(){ memfootprint_instr_interval_output(); memfootprint_instr_interval_reset(); interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; } /* instrumenting (instruction level) */ @@ -288,10 +307,10 @@ VOID fini_memfootprint(INT32 code, VOID* v){ long long ImemPageWorkingSetSize = 0L; if(interval_size == -1){ - output_file_memfootprint = fopen("memfootprint_full_int_pin.out","w"); + output_file_memfootprint.open("memfootprint_full_int_pin.out", ios::out|ios::trunc); } else{ - output_file_memfootprint = fopen("memfootprint_phases_int_pin.out","a"); + output_file_memfootprint.open("memfootprint_phases_int_pin.out", ios::out|ios::app); } for (i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) { @@ -330,7 +349,7 @@ VOID fini_memfootprint(INT32 code, VOID* v){ } } } - fprintf(output_file_memfootprint,"%lld %lld %lld %lld\n", DmemCacheWorkingSetSize, DmemPageWorkingSetSize, ImemCacheWorkingSetSize, ImemPageWorkingSetSize); - fprintf(output_file_memfootprint,"number of instructions: %lld\n", total_ins_count); - fclose(output_file_memfootprint); + output_file_memfootprint << DmemCacheWorkingSetSize << " " << DmemPageWorkingSetSize << " " << ImemCacheWorkingSetSize << " " << ImemPageWorkingSetSize << endl; + output_file_memfootprint << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; + output_file_memfootprint.close(); } diff --git a/mica_memreusedist.cpp b/mica_memreusedist.cpp index 6fb1869..1eb703e 100644 --- a/mica_memreusedist.cpp +++ b/mica_memreusedist.cpp @@ -16,9 +16,14 @@ extern INT64 interval_size; extern INT64 interval_ins_count; +extern INT64 interval_ins_count_for_hpc_alignment; extern INT64 total_ins_count; +extern INT64 total_ins_count_for_hpc_alignment; -FILE* output_file_memreusedist; +extern UINT32 _block_size; +UINT32 memreusedist_block_size; + +ofstream output_file_memreusedist; typedef struct stack_entry_type { struct stack_entry_type* prev; @@ -46,31 +51,6 @@ INT64 cold_refs; INT64 buckets[BUCKET_CNT]; stack_entry* borderline_stack_entries[BUCKET_CNT]; -/*VOID print_stack(){ - - stack_entry* e = stack_top; - int index; - - fprintf(stderr,"borderline_stack_entries: "); - for(index=0; index < BUCKET_CNT; index++) - fprintf(stderr,"[%d] 0x%x, ", index, (unsigned int)borderline_stack_entries[index]); - fprintf(stderr,"\n"); - - index = 0; - while(e != (stack_entry*)NULL){ - - fprintf(stderr," 0x%x [a: 0x%x] (bucket: %d)", (unsigned int)e, (unsigned int)e->block_addr, (int)e->bucket); - if(borderline_stack_entries[index] == e){ - fprintf(stderr," *\n"); - index++; - } - else - fprintf(stderr,"\n"); - - e = e->prev; - } -}*/ - /* initializing */ void init_memreusedist(){ @@ -94,15 +74,23 @@ void init_memreusedist(){ stack_top->next = NULL; stack_top->prev = NULL; stack_top->bucket = 0; - stack_size = 0; - borderline_stack_entries[0] = stack_top; + //borderline_stack_entries[0] = stack_top; // NO! First bucket contains two entries + // dummy entry as first borderline entry + borderline_stack_entries[0] = (stack_entry*)malloc(sizeof(stack_entry)); + borderline_stack_entries[0]->block_addr = 0; + borderline_stack_entries[0]->bucket = -1; + borderline_stack_entries[0]->next = NULL; + borderline_stack_entries[0]->prev = NULL; + borderline_stack_entries[0]->bucket = 0; - //print_stack(); + stack_size = 0; + + memreusedist_block_size = _block_size; if(interval_size != -1){ - output_file_memreusedist = fopen("memreusedist_phases_int_pin.out","w"); - fclose(output_file_memreusedist); + output_file_memreusedist.open("memreusedist_phases_int_pin.out", ios::out|ios::trunc); + output_file_memreusedist.close(); } } @@ -115,18 +103,18 @@ ADDRINT memreusedist_instr_intervals(){ /* counting instructions is done in all_instr_intervals() */ - return (ADDRINT)(total_ins_count % interval_size == 0); + return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); } VOID memreusedist_instr_interval_output(){ int i; - output_file_memreusedist = fopen("memreusedist_phases_int_pin.out","a"); - fprintf(output_file_memreusedist, "%lld %lld", mem_ref_cnt, cold_refs); + output_file_memreusedist.open("memreusedist_phases_int_pin.out", ios::out|ios::app); + output_file_memreusedist << mem_ref_cnt << " " << cold_refs; for(i=0; i < BUCKET_CNT; i++){ - fprintf(output_file_memreusedist, " %lld", buckets[i]); + output_file_memreusedist << " " << buckets[i]; } - fprintf(output_file_memreusedist, "\n"); - fclose(output_file_memreusedist); + output_file_memreusedist << endl; + output_file_memreusedist.close(); } VOID memreusedist_instr_interval_reset(){ @@ -143,6 +131,7 @@ VOID memreusedist_instr_interval(){ memreusedist_instr_interval_output(); memreusedist_instr_interval_reset(); interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; } /* hash table support */ @@ -168,20 +157,22 @@ stack_entry** install(block_fast** table, ADDRINT key){ b = table[index]; if(b == (block_fast*)NULL) { - if((b = (block_fast*)malloc(sizeof(block_fast))) == (block_fast*)NULL){ - fprintf(stderr,"Not enough memory (in install)\n"); + b = (block_fast*)malloc(sizeof(block_fast)); + /*if((b = (block_fast*)malloc(sizeof(block_fast))) == (block_fast*)NULL){ + cerr << "Not enough memory (in install)" << endl; exit(1); - } + }*/ table[index] = b; } else{ while(b->next != (block_fast*)NULL){ b = b->next; } - if((b->next = (block_fast*)malloc(sizeof(block_fast))) == (block_fast*)NULL){ - fprintf(stderr,"Not enough memory (in install (2))\n"); + b->next = (block_fast*)malloc(sizeof(block_fast)); + /*if((b->next = (block_fast*)malloc(sizeof(block_fast))) == (block_fast*)NULL){ + cerr << "Not enough memory (in install (2))" << endl; exit(1); - } + }*/ b = b->next; } b->next = (block_fast*)NULL; @@ -192,6 +183,60 @@ stack_entry** install(block_fast** table, ADDRINT key){ return b->stack_entries; } +VOID print_stack(stack_entry* top){ + + stack_entry* e; + int i; + + e = top; + i = 0; + while(e != NULL){ + + fprintf(stderr, "[%d] 0x%llx, b: %d (next: 0x%llx, prev: 0x%llx)\n", i, (unsigned long long)e, e->bucket, (unsigned long long)e->next, (unsigned long long)e->prev); + i++; + e = e->prev; + } + fprintf(stderr, "------------------------\n"); +} + +VOID stack_sanity_check(stack_entry* top){ + + int cnt=0; + int bucket = 0; + stack_entry *e; + + e = top; + + if(top->next != NULL){ + fprintf(stderr, "ERROR! top->next != NULL\n"); + print_stack(top); + exit(-1); + } + + while(e != NULL){ + + cnt++; + + if(cnt > (2 << bucket)){ + fprintf(stderr, "ERROR @ [%d]! Bucket too big (cnt: %d, bucket: %d, max. size: %d)\n", cnt-1, cnt, bucket, 2 << bucket); + print_stack(top); + exit(-1); + } + + if(e->bucket != bucket){ + fprintf(stderr, "ERROR @ [%d]! Bucket doesn't match @ 0x%llx (b: %d != %d)!\n", cnt-1, (unsigned long long)e, e->bucket, bucket); + print_stack(top); + exit(-1); + } + + if(e == borderline_stack_entries[bucket]) + bucket++; + + e = e->prev; + } + //fprintf(stderr, "STACK ok!\n"); +} + /* stack support */ VOID move_to_top_fast(stack_entry *e, ADDRINT a, stack_entry** top){ @@ -201,34 +246,37 @@ VOID move_to_top_fast(stack_entry *e, ADDRINT a, stack_entry** top){ if(e != (stack_entry*)NULL){ /* check to see if we already are at top of stack */ - if(e->next != (stack_entry*)NULL){ + if(e != *top){ - /* avoid referencing prev for bottom of stack */ - if(e->prev != (stack_entry*)NULL){ + // if entry touched is borderline entry, new borderline entry is the one above the touched one (i.e. ->next) + if(e->bucket > 0 && e == borderline_stack_entries[e->bucket]){ + borderline_stack_entries[e->bucket] = borderline_stack_entries[e->bucket]->next; + } + + // take entry out of stack, update entries above and below accordingly + if(e->prev != (stack_entry*)NULL){ // avoid referencing prev for bottom of stack e->prev->next = e->next; } e->next->prev = e->prev; // adjust all borderline entries above the entry touched (start with i=2 to avoid problems with too small stacks) - for(i=2; i < BUCKET_CNT && i < e->bucket; i++){ + for(i=2; i < BUCKET_CNT && i <= e->bucket; i++){ borderline_stack_entries[i-1]->bucket++; borderline_stack_entries[i-1] = borderline_stack_entries[i-1]->next; } - // if entry touched is borderline entry, new borderline entry is the one above the touched one (i.e. ->next) - if(e == borderline_stack_entries[e->bucket-1]){ - borderline_stack_entries[e->bucket-1] = borderline_stack_entries[e->bucket-1]->next; - } // place new entry on top of LRU stack e->prev = *top; - e->next = (stack_entry*)NULL; - (*top)->next = e; - borderline_stack_entries[0]->bucket++; - (*top)->bucket = 1; - borderline_stack_entries[0] = (*top); + e->next = (stack_entry*)NULL; // e will be the next top + (*top)->next = e; // current top will slide down + if(e != borderline_stack_entries[0]){ + borderline_stack_entries[0]->bucket++; // borderline stack entry for first bucket moves to next bucket, unless it's the same as the borderline entry + } + //(*top)->bucket = 1; // current top slides into next bucket (INCORRECT, because first bucket contains top *and* previous top) + borderline_stack_entries[0] = (*top); // current top is borderline stack entry for first bucket - *top = e; - e->bucket = 0; + *top = e; // set new top of stack + e->bucket = 0; // set bucket for new top of stack } /* else: if top of stack was referenced again, nothing to do! */ @@ -246,7 +294,7 @@ VOID move_to_top_fast(stack_entry *e, ADDRINT a, stack_entry** top){ // adjust top of stack (*top)->next = e; borderline_stack_entries[0]->bucket++; - (*top)->bucket = 1; + //(*top)->bucket = 1; // current top slides into next bucket (INCORRECT, because first bucket contains top *and* previous top) borderline_stack_entries[0] = (*top); // set new entry as top of stack @@ -256,7 +304,7 @@ VOID move_to_top_fast(stack_entry *e, ADDRINT a, stack_entry** top){ stack_size++; // adjust bucket for borderline entries (except for very last bucket = overflow bucket) - for(i=2; i < BUCKET_CNT-1 && (1 << i) <= stack_size; i++){ + for(i=2; i < BUCKET_CNT && (1 << i) <= stack_size; i++){ borderline_stack_entries[i-1]->bucket++; borderline_stack_entries[i-1] = borderline_stack_entries[i-1]->next; } @@ -269,6 +317,7 @@ VOID move_to_top_fast(stack_entry *e, ADDRINT a, stack_entry** top){ } } } + //stack_sanity_check(*top); } /* determine reuse distance (= number of unique cache blocks referenced since last time this cache was referenced) @@ -288,39 +337,41 @@ VOID memreusedist_memRead(ADDRINT effMemAddr, ADDRINT size){ stack_entry** chunk; stack_entry* entry_for_addr; - /* D-stream (64-byte) cache memory footprint */ + /* D-stream (64-byte) cache memory footprint */ - addr = effMemAddr >> 6; - endAddr = (effMemAddr + size) >> 6; + addr = effMemAddr >> memreusedist_block_size; + endAddr = (effMemAddr + size - 1) >> memreusedist_block_size; - for(a = addr; a <= endAddr; a++){ + if(size > 0){ + for(a = addr; a <= endAddr; a++){ - upperAddr = a >> LOG_MAX_MEM_ENTRIES; - indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_ENTRIES); + upperAddr = a >> LOG_MAX_MEM_ENTRIES; + indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_ENTRIES); - chunk = lookup(hashTableCacheBlocks_fast, upperAddr); - if(chunk == (stack_entry**)NULL) - chunk = install(hashTableCacheBlocks_fast, upperAddr); + chunk = lookup(hashTableCacheBlocks_fast, upperAddr); + if(chunk == (stack_entry**)NULL) + chunk = install(hashTableCacheBlocks_fast, upperAddr); - entry_for_addr = chunk[indexInChunk]; + entry_for_addr = chunk[indexInChunk]; - /* determine reuse distance for this access (if it has been accessed before) */ - INT64 b = det_reuse_dist_bucket(entry_for_addr); + /* determine reuse distance for this access (if it has been accessed before) */ + INT64 b = det_reuse_dist_bucket(entry_for_addr); - if(b < 0) - cold_refs++; - else - buckets[b]++; + if(b < 0) + cold_refs++; + else + buckets[b]++; - /* adjust LRU stack */ - move_to_top_fast(entry_for_addr, a, &stack_top); + /* adjust LRU stack */ + move_to_top_fast(entry_for_addr, a, &stack_top); - /* update hash table for new cache blocks */ - if(chunk[indexInChunk] == (stack_entry*)NULL) - chunk[indexInChunk] = stack_top; + /* update hash table for new cache blocks */ + if(chunk[indexInChunk] == (stack_entry*)NULL) + chunk[indexInChunk] = stack_top; - mem_ref_cnt++; - } + mem_ref_cnt++; + } + } } VOID instrument_memreusedist(INS ins, VOID *v){ @@ -346,15 +397,15 @@ VOID fini_memreusedist(INT32 code, VOID* v){ int i; if(interval_size == -1){ - output_file_memreusedist = fopen("memreusedist_full_int_pin.out","w"); + output_file_memreusedist.open("memreusedist_full_int_pin.out", ios::out|ios::trunc); } else{ - output_file_memreusedist = fopen("memreusedist_phases_int_pin.out","a"); + output_file_memreusedist.open("memreusedist_phases_int_pin.out", ios::out|ios::app); } - fprintf(output_file_memreusedist, "%lld %lld", mem_ref_cnt, cold_refs); + output_file_memreusedist << mem_ref_cnt << " " << cold_refs; for(i=0; i < BUCKET_CNT; i++){ - fprintf(output_file_memreusedist, " %lld", buckets[i]); + output_file_memreusedist << " " << buckets[i]; } - fprintf(output_file_memreusedist,"\nnumber of instructions: %lld\n", total_ins_count); - fclose(output_file_memreusedist); + output_file_memreusedist << endl << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; + output_file_memreusedist.close(); } diff --git a/mica_ppm.cpp b/mica_ppm.cpp index 7b5bb59..92c82f8 100644 --- a/mica_ppm.cpp +++ b/mica_ppm.cpp @@ -16,9 +16,11 @@ extern INT64 interval_size; extern INT64 interval_ins_count; +extern INT64 interval_ins_count_for_hpc_alignment; extern INT64 total_ins_count; +extern INT64 total_ins_count_for_hpc_alignment; -FILE* output_file_ppm; +ofstream output_file_ppm; BOOL lastInstBr; // was the last instruction a cond. branch instruction? ADDRINT nextAddr; // address of the instruction after the last cond.branch @@ -74,7 +76,7 @@ void init_ppm(){ /* translation of instruction address to indices */ indices_condBr_size = 1024; if( (indices_condBr = (ADDRINT*) malloc(indices_condBr_size*sizeof(ADDRINT))) == (ADDRINT*)NULL){ - fprintf(stderr,"Could not allocate memory for indices_condBr\n"); + cerr << "Could not allocate memory for indices_condBr" << endl; exit(1); } @@ -82,24 +84,24 @@ void init_ppm(){ /* global/local history */ bhr = 0; if((local_bhr = (int*) malloc (brHist_size * sizeof(int))) == (int*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } /* GAg PPM predictor */ if((GAg_pht = ((char***) malloc (NUM_HIST_LENGTHS * sizeof(char**)))) == (char***) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } for(j = 0; j < NUM_HIST_LENGTHS; j++) { if((GAg_pht[j] = (char**) malloc((history_lengths[j]+1)*sizeof(char*))) == (char**) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } for(i = 0; i <= history_lengths[j]; i++){ if((GAg_pht[j][i] = (char*) malloc((1 << i)*sizeof(char))) == (char*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } for(k = 0; k < (1 << i); k++) @@ -109,18 +111,18 @@ void init_ppm(){ /* PAg PPM predictor */ if((PAg_pht = ((char***) malloc (NUM_HIST_LENGTHS * sizeof(char**)))) == (char***) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } for(j = 0; j < NUM_HIST_LENGTHS; j++) { if((PAg_pht[j] = (char**) malloc((history_lengths[j]+1)*sizeof(char*))) == (char**) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } for(i = 0; i <= history_lengths[j]; i++){ if((PAg_pht[j][i] = (char*) malloc((1 << i)*sizeof(char))) == (char*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } for(k = 0; k < (1 << i); k++) @@ -130,43 +132,43 @@ void init_ppm(){ /* GAs PPM predictor */ if((GAs_touched = (char*) malloc (brHist_size * sizeof(char))) == (char*) NULL){ - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } if((GAs_pht = (char****) malloc (brHist_size * sizeof(char***))) == (char****) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } /* PAs PPM predictor */ if((PAs_touched = (char*) malloc (brHist_size * sizeof(char))) == (char*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } if((PAs_pht = (char****) malloc (brHist_size * sizeof(char***))) == (char****) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } if((transition_counts = (INT64*) malloc (brHist_size * sizeof(INT64))) == (INT64*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } if((local_taken = (char*) malloc (brHist_size * sizeof(char))) == (char*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } if((local_brCounts = (INT64*) malloc (brHist_size * sizeof(INT64))) == (INT64*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } if((local_taken_counts = (INT64*) malloc (brHist_size * sizeof(INT64))) == (INT64*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + cerr << "Could not allocate memory" << endl; exit(1); } @@ -187,8 +189,8 @@ void init_ppm(){ } if(interval_size != -1){ - output_file_ppm = fopen("ppm_phases_int_pin.out","w"); - fclose(output_file_ppm); + output_file_ppm.open("ppm_phases_int_pin.out", ios::out|ios::trunc); + output_file_ppm.close(); } } @@ -198,7 +200,7 @@ void init_ppm(){ ADDRINT ppm_instr_intervals(){ - return (ADDRINT)(total_ins_count % interval_size == 0); + return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); } VOID ppm_instr_interval_output(){ @@ -207,11 +209,11 @@ VOID ppm_instr_interval_output(){ INT64 total_taken_count = 0; INT64 total_brCount = 0; - output_file_ppm = fopen("ppm_phases_int_pin.out","a"); + output_file_ppm.open("ppm_phases_int_pin.out", ios::out|ios::app); - fprintf(output_file_ppm, "%lld", (long long)interval_size); + output_file_ppm << interval_size; for(i = 0; i < NUM_HIST_LENGTHS; i++) - fprintf(output_file_ppm, " %lld %lld %lld %lld", (long long)GAg_incorrect_pred[i], (long long)PAg_incorrect_pred[i], (long long)GAs_incorrect_pred[i], (long long)PAs_incorrect_pred[i]); + output_file_ppm << " " << GAg_incorrect_pred[i] << " " << PAg_incorrect_pred[i] << " " << GAs_incorrect_pred[i] << " " << PAs_incorrect_pred[i]; for(i=0; i < brHist_size; i++){ if(local_brCounts[i] > 0){ @@ -227,9 +229,8 @@ VOID ppm_instr_interval_output(){ total_brCount += local_brCounts[i]; } } - fprintf(output_file_ppm," %lld %lld %lld\n",(long long)total_brCount,(long long)total_transition_count,(long long)total_taken_count); - - fclose(output_file_ppm); + output_file_ppm << " " << total_brCount << " " << total_transition_count << " " << total_taken_count << endl; + output_file_ppm.close(); } VOID ppm_instr_interval_reset(){ @@ -256,6 +257,7 @@ VOID ppm_instr_interval(){ ppm_instr_interval_reset(); interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; } /* double memory space for branch history size when needed */ @@ -269,66 +271,66 @@ VOID reallocate_brHist(){ brHist_size = brHist_size*2; int_ptr = (INT32*) realloc (local_bhr,brHist_size * sizeof(INT32)); - if(int_ptr == (INT32*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + /*if(int_ptr == (INT32*) NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ local_bhr = int_ptr; char_ptr = (char*) realloc (GAs_touched, brHist_size * sizeof(char)); - if(char_ptr == (char*) NULL){ - fprintf(stderr,"Could not allocate memory\n"); + /*if(char_ptr == (char*) NULL){ + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ GAs_touched = char_ptr; char4_ptr = (char****) realloc (GAs_pht,brHist_size * sizeof(char***)); - if(char4_ptr == (char****) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + /*if(char4_ptr == (char****) NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ GAs_pht = char4_ptr; char_ptr = (char*) realloc (PAs_touched,brHist_size * sizeof(char)); - if(char_ptr == (char*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + /*if(char_ptr == (char*) NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ PAs_touched = char_ptr; char4_ptr = (char****) realloc (PAs_pht,brHist_size * sizeof(char***)); - if(char4_ptr == (char****) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + /*if(char4_ptr == (char****) NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ PAs_pht = char4_ptr; char_ptr = (char*) realloc (local_taken,brHist_size * sizeof(char)); - if(char_ptr == (char*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + /*if(char_ptr == (char*) NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ local_taken = char_ptr; int64_ptr = (INT64*) realloc(transition_counts, brHist_size * sizeof(INT64)); - if(int64_ptr == (INT64*)NULL) { - fprintf(stderr,"Could not allocate memory\n"); + /*if(int64_ptr == (INT64*)NULL) { + cerr,"Could not allocate memory" << endl; exit(1); - } + }*/ transition_counts = int64_ptr; int64_ptr = (INT64*) realloc(local_brCounts, brHist_size * sizeof(INT64)); - if(int64_ptr == (INT64*)NULL) { - fprintf(stderr,"Could not allocate memory\n"); + /*if(int64_ptr == (INT64*)NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ local_brCounts = int64_ptr; int64_ptr = (INT64*) realloc(local_taken_counts, brHist_size * sizeof(INT64)); - if(int64_ptr == (INT64*)NULL) { - fprintf(stderr,"Could not allocate memory\n"); + /*if(int64_ptr == (INT64*)NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ local_taken_counts = int64_ptr; } @@ -348,22 +350,25 @@ VOID condBr(UINT32 id, BOOL _t){ GAs_touched[id] = 1; - if((GAs_pht[id] = ((char***) malloc (NUM_HIST_LENGTHS * sizeof(char**)))) == (char***) NULL) { - fprintf(stderr,"Could not allocate memory)\n"); + GAs_pht[id] = ((char***) malloc (NUM_HIST_LENGTHS * sizeof(char**))); + /*if((GAs_pht[id] = ((char***) malloc (NUM_HIST_LENGTHS * sizeof(char**)))) == (char***) NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ for(j = 0; j < NUM_HIST_LENGTHS; j++){ - if((GAs_pht[id][j] = ((char**) malloc ((history_lengths[j]+1) * sizeof(char*)))) == (char**) NULL) { - fprintf(stderr,"Could not allocate memory)\n"); + GAs_pht[id][j] = ((char**) malloc ((history_lengths[j]+1) * sizeof(char*))); + /*if((GAs_pht[id][j] = ((char**) malloc ((history_lengths[j]+1) * sizeof(char*)))) == (char**) NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ for(i = 0; i <= (int)history_lengths[j]; i++){ - if((GAs_pht[id][j][i] = (char*) malloc((1 << i) * sizeof(char))) == (char*) NULL) { - fprintf(stderr,"Could not allocate memory\n"); + GAs_pht[id][j][i] = (char*) malloc((1 << i) * sizeof(char)); + /*if((GAs_pht[id][j][i] = (char*) malloc((1 << i) * sizeof(char))) == (char*) NULL) { + cerr << "Could not allocate memory" << endl; exit(1); - } + }*/ for(k = 0; k < (1<= MAX_COMM_DIST){ age = MAX_COMM_DIST - 1; // trim if needed } - assert(age >= 0); + //assert(age >= 0); regAgeDistr[age]++; /* register usage */ @@ -116,7 +118,7 @@ VOID writeRegOp_reg(UINT32 regId){ num = regUseCnt[regId]; if(num >= MAX_REG_USE) // trim if needed num = MAX_REG_USE - 1; - assert(num >= 0); + //assert(num >= 0); regUseDistr[num]++; } @@ -162,13 +164,13 @@ ADDRINT reg_instr_intervals(VOID* _e) { opCounts[e->regOpCnt]++; - return (ADDRINT) (total_ins_count % interval_size == 0); + return (ADDRINT) (interval_ins_count_for_hpc_alignment == interval_size); } VOID reg_instr_interval_output(){ int i; - output_file_reg = fopen("reg_phases_int_pin.out","a"); + output_file_reg.open("reg_phases_int_pin.out", ios::out|ios::app); UINT64 totNumOps = 0; UINT64 num; @@ -178,7 +180,7 @@ VOID reg_instr_interval_output(){ for(i = 1; i < MAX_NUM_OPER; i++){ totNumOps += opCounts[i]*i; } - fprintf(output_file_reg,"%lld %lld",(long long)interval_size, (long long)totNumOps); + output_file_reg << interval_size << " " << totNumOps; /* average degree of use */ num = 0; @@ -186,12 +188,12 @@ VOID reg_instr_interval_output(){ for(i = 0; i < MAX_REG_USE; i++){ num += regUseDistr[i]; } - fprintf(output_file_reg," %lld",(long long)num); + output_file_reg << " " << num; num = 0; for(i = 0; i < MAX_REG_USE; i++){ num += i * regUseDistr[i]; } - fprintf(output_file_reg," %lld",(long long)num); + output_file_reg << " " << num; /* register dependency distributions */ num = 0; @@ -199,17 +201,17 @@ VOID reg_instr_interval_output(){ for(i = 0; i < MAX_COMM_DIST; i++){ num += regAgeDistr[i]; } - fprintf(output_file_reg," %lld",(long long)num); + output_file_reg << " " << num; num = 0; for(i = 0; i < MAX_COMM_DIST; i++){ num += regAgeDistr[i]; if( (i == 1) || (i == 2) || (i == 4) || (i == 8) || (i == 16) || (i == 32) || (i == 64)){ - fprintf(output_file_reg," %lld",(long long)num); + output_file_reg << " " << num; } } - fprintf(output_file_reg,"\n"); + output_file_reg << endl; - fclose(output_file_reg); + output_file_reg.close(); } VOID reg_instr_interval_reset(){ @@ -239,6 +241,7 @@ VOID reg_instr_interval() { reg_instr_interval_output(); reg_instr_interval_reset(); interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; } @@ -250,14 +253,12 @@ VOID instrument_reg(INS ins, ins_buffer_entry* e){ if(!e->setRead){ - //fprintf(stderr, " NEW instruction @ 0x%x, with %d reg reads and %d reg writes\n", e->insAddr, e->regReadCnt, e->regWriteCnt); - maxNumRegsCons = INS_MaxNumRRegs(ins); // maximum number of register consumations (reads) regReadCnt = 0; for(i = 0; i < maxNumRegsCons; i++){ // finding all register operands which are read reg = INS_RegR(ins,i); - assert(((UINT32)reg) < MAX_NUM_REGS); + //assert(((UINT32)reg) < MAX_NUM_REGS); /* only consider valid general-purpose registers (any bit-width) and floating-point registers, * i.e. exlude branch, segment and pin registers, among others */ if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -266,15 +267,16 @@ VOID instrument_reg(INS ins, ins_buffer_entry* e){ } e->regReadCnt = regReadCnt; - if((e->regsRead = (REG*)malloc(regReadCnt*sizeof(REG))) == (REG*)NULL){ - fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", e->insAddr); + e->regsRead = (REG*)malloc(regReadCnt*sizeof(REG)); + /*if((e->regsRead = (REG*)malloc(regReadCnt*sizeof(REG))) == (REG*)NULL){ + cerr << "ERROR: Could not allocate regsRead memory for ins 0x" << hex << unsigned int)e->insAddr << endl; exit(1); - } + }*/ regReadCnt = 0; for(i = 0; i < maxNumRegsCons; i++){ // finding all register operands which are read reg = INS_RegR(ins,i); - assert(((UINT32)reg) < MAX_NUM_REGS); + //assert(((UINT32)reg) < MAX_NUM_REGS); /* only consider valid general-purpose registers (any bit-width) and floating-point registers, * i.e. exlude branch, segment and pin registers, among others */ if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -291,7 +293,7 @@ VOID instrument_reg(INS ins, ins_buffer_entry* e){ for(i=0; i < maxNumRegsProd; i++){ reg = INS_RegW(ins, i); - assert(((UINT32)reg) < MAX_NUM_REGS); + //assert(((UINT32)reg) < MAX_NUM_REGS); /* only consider valid general-purpose registers (any bit-width) and floating-point registers, * i.e. exlude branch, segment and pin registers, among others */ if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -300,16 +302,17 @@ VOID instrument_reg(INS ins, ins_buffer_entry* e){ } e->regWriteCnt = regWriteCnt; - if((e->regsWritten = (REG*)malloc(regWriteCnt*sizeof(REG))) == (REG*)NULL){ - fprintf(stderr,"ERROR: Could not allocate regsRead memory for ins 0x%x\n", e->insAddr); + e->regsWritten = (REG*)malloc(regWriteCnt*sizeof(REG)); + /*if((e->regsWritten = (REG*)malloc(regWriteCnt*sizeof(REG))) == (REG*)NULL){ + cerr << "ERROR: Could not allocate regsRead memory for ins 0x" << hex << (unsigned int)e->insAddr << endl; exit(1); - } + }*/ regWriteCnt = 0; for(i=0; i < maxNumRegsProd; i++){ reg = INS_RegW(ins, i); - assert(((UINT32)reg) < MAX_NUM_REGS); + //assert(((UINT32)reg) < MAX_NUM_REGS); /* only consider valid general-purpose registers (any bit-width) and floating-point registers, * i.e. exlude branch, segment and pin registers, among others */ if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ @@ -328,10 +331,10 @@ VOID instrument_reg(INS ins, ins_buffer_entry* e){ if(INS_OperandIsReg(ins,i)) regOpCnt++; } - if(regOpCnt >= MAX_NUM_OPER){ - fprintf(stderr,"BOOM! -> MAX_NUM_OPER is exceeded! (%u)\n", regOpCnt); + /*if(regOpCnt >= MAX_NUM_OPER){ + cerr << "BOOM! -> MAX_NUM_OPER is exceeded! (" << regOpCnt << ")" << endl; exit(1); - } + }*/ e->regOpCnt = regOpCnt; e->setRegOpCnt = true; } @@ -350,12 +353,12 @@ VOID instrument_reg(INS ins, ins_buffer_entry* e){ VOID fini_reg(INT32 code, VOID* v){ if(interval_size == -1){ - output_file_reg = fopen("reg_full_int_pin.out","w"); - fprintf(output_file_reg,"%lld",(long long)total_ins_count); + output_file_reg.open("reg_full_int_pin.out", ios::out|ios::trunc); + output_file_reg << total_ins_count; } else{ - output_file_reg = fopen("reg_phases_int_pin.out","a"); - fprintf(output_file_reg,"%lld",(long long)interval_ins_count); + output_file_reg.open("reg_phases_int_pin.out", ios::out|ios::app); + output_file_reg << interval_ins_count; } int i; @@ -366,7 +369,7 @@ VOID fini_reg(INT32 code, VOID* v){ for(i = 1; i < MAX_NUM_OPER; i++){ totNumOps += opCounts[i]*i; } - fprintf(output_file_reg," %lld", (long long)totNumOps); + output_file_reg << " " << totNumOps; // ** average degree of use ** num = 0; @@ -374,12 +377,12 @@ VOID fini_reg(INT32 code, VOID* v){ for(i = 0; i < MAX_REG_USE; i++){ num += regUseDistr[i]; } - fprintf(output_file_reg," %lld", (long long)num); + output_file_reg << " " << num; num = 0; for(i = 0; i < MAX_REG_USE; i++){ num += i * regUseDistr[i]; } - fprintf(output_file_reg," %lld", (long long)num); + output_file_reg << " " << num; // ** register dependency distributions ** num = 0; @@ -387,15 +390,15 @@ VOID fini_reg(INT32 code, VOID* v){ for(i = 0; i < MAX_COMM_DIST; i++){ num += regAgeDistr[i]; } - fprintf(output_file_reg," %lld",(long long)num); + output_file_reg << " " << num; num = 0; for(i = 0; i < MAX_COMM_DIST; i++){ num += regAgeDistr[i]; if( (i == 1) || (i == 2) || (i == 4) || (i == 8) || (i == 16) || (i == 32) || (i == 64)){ - fprintf(output_file_reg," %lld",(long long)num); + output_file_reg << " " << num; } } - fprintf(output_file_reg,"\n"); - fprintf(output_file_reg, "number of instructions: %lld\n", total_ins_count); - fclose(output_file_reg); + output_file_reg << endl; + output_file_reg << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; + output_file_reg.close(); } diff --git a/mica_stride.cpp b/mica_stride.cpp index 667eca3..0a42998 100644 --- a/mica_stride.cpp +++ b/mica_stride.cpp @@ -16,9 +16,11 @@ extern INT64 interval_size; extern INT64 interval_ins_count; +extern INT64 interval_ins_count_for_hpc_alignment; extern INT64 total_ins_count; +extern INT64 total_ins_count_for_hpc_alignment; -FILE* output_file_stride; +ofstream output_file_stride; UINT64 numRead, numWrite; UINT32 readIndex; @@ -53,16 +55,16 @@ void init_stride(){ /* allocate memory */ if ((instrRead = (ADDRINT*) malloc (numRead * sizeof (ADDRINT))) == (ADDRINT*) NULL) { - fprintf (stderr, "Not enough memory (in main (2))\n"); + cerr << "Not enough memory (in main (2))" << endl; exit (0); } - //fprintf(stderr,"malloc %d bytes\n",numRead*sizeof(ADDRINT)); + //cerr << "malloc " << numRead*sizeof(ADDRINT) << "bytes" << endl; if ((instrWrite = (ADDRINT*) malloc (numWrite * sizeof (ADDRINT))) == (ADDRINT*) NULL) { - fprintf (stderr, "Not enough memory (in main (3))\n"); + cerr << "Not enough memory (in main (3))" << endl; exit (0); } - //fprintf(stderr,"malloc %d bytes\n",numWrite*sizeof(ADDRINT)); + //cerr << "malloc " << numWrite*sizeof(ADDRINT) << "bytes" << endl; /* initialize */ readIndex = 1; @@ -85,7 +87,7 @@ void init_stride(){ indices_memRead_size = 1024; if( (indices_memRead = (ADDRINT*) malloc(indices_memRead_size*sizeof(ADDRINT))) == (ADDRINT*)NULL){ - fprintf(stderr,"Could not allocate memory for indices_memRead\n"); + cerr << "Could not allocate memory for indices_memRead" << endl; exit(1); } for (i = 0; i < (int)indices_memRead_size; i++) @@ -93,16 +95,16 @@ void init_stride(){ indices_memWrite_size = 1024; if( (indices_memWrite = (ADDRINT*) malloc(indices_memWrite_size*sizeof(ADDRINT))) == (ADDRINT*)NULL){ - fprintf(stderr,"Could not allocate memory for indices_memWrite\n"); + cerr << "Could not allocate memory for indices_memWrite" << endl; exit(1); } for (i = 0; i < (int)indices_memWrite_size; i++) indices_memWrite[i] = 0; - if(interval_size != -1){ - output_file_stride = fopen("stride_phases_int_pin.out","w"); - fclose(output_file_stride); + if(interval_size != -1){ + output_file_stride.open("stride_phases_int_pin.out", ios::out|ios::trunc); + output_file_stride.close(); } } @@ -112,7 +114,7 @@ void init_stride(){ ADDRINT stride_instr_intervals(){ /* counting instructions is done in all_instr_intervals() */ - return (ADDRINT) (total_ins_count % interval_size == 0); + return (ADDRINT) (interval_ins_count_for_hpc_alignment == interval_size); } VOID stride_instr_interval_output(){ @@ -120,18 +122,18 @@ VOID stride_instr_interval_output(){ UINT64 cum; - output_file_stride = fopen("stride_phases_int_pin.out","a"); + output_file_stride.open("stride_phases_int_pin.out", ios::out|ios::app); - fprintf(output_file_stride,"%lld",(long long)numReadInstrsAnalyzed); + output_file_stride << numReadInstrsAnalyzed; /* local read distribution */ cum = 0; for(i = 0; i < MAX_DISTR; i++){ cum += localReadDistrib[i]; if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ if(cum > 0) - fprintf(output_file_stride," %lld", (long long) cum); + output_file_stride << " " << cum; else - fprintf(output_file_stride," %d", 0); + output_file_stride << " 0"; } if(i == 262144) break; @@ -142,23 +144,23 @@ VOID stride_instr_interval_output(){ cum += globalReadDistrib[i]; if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ if(cum > 0) - fprintf(output_file_stride," %lld", (long long) cum); + output_file_stride << " " << cum; else - fprintf(output_file_stride," %d", 0); + output_file_stride << " 0"; } if(i == 262144) break; } - fprintf(output_file_stride," %lld",(long long)numWriteInstrsAnalyzed); + output_file_stride << " " << numWriteInstrsAnalyzed; /* local write distribution */ cum = 0; for(i = 0; i < MAX_DISTR; i++){ cum += localWriteDistrib[i]; if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ if(cum > 0) - fprintf(output_file_stride," %lld", (long long) cum); + output_file_stride << " " << cum; else - fprintf(output_file_stride," %d", 0); + output_file_stride << " 0"; } if(i == 262144) break; @@ -169,19 +171,19 @@ VOID stride_instr_interval_output(){ cum += globalWriteDistrib[i]; if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) ){ if(cum > 0) - fprintf(output_file_stride," %lld", (long long) cum); + output_file_stride << " " << cum; else - fprintf(output_file_stride," %d", 0); + output_file_stride << " 0"; } if(i == 262144){ if(cum > 0) - fprintf(output_file_stride," %lld\n", (long long) cum); + output_file_stride << " " << cum << endl; else - fprintf(output_file_stride," %d\n", 0); + output_file_stride << " 0" << endl; break; } } - fclose(output_file_stride); + output_file_stride.close(); } VOID stride_instr_interval_reset(){ @@ -196,6 +198,8 @@ VOID stride_instr_interval_reset(){ numInstrsAnalyzed = 0; numReadInstrsAnalyzed = 0; numWriteInstrsAnalyzed = 0; + interval_ins_count = 0; + interval_ins_count_for_hpc_alignment = 0; } void stride_instr_interval(){ @@ -228,10 +232,10 @@ VOID reallocate_readArray_stride(){ numRead *= 2; ptr = (ADDRINT*) realloc (instrRead, numRead * sizeof (ADDRINT)); - if (ptr == (ADDRINT*) NULL) { - fprintf (stderr, "Not enough memory (in reallocate_readArray_stride)\n"); + /*if (ptr == (ADDRINT*) NULL) { + cerr << "Not enough memory (in reallocate_readArray_stride)" << endl; exit (1); - } + }*/ instrRead = ptr; } @@ -253,10 +257,10 @@ VOID reallocate_writeArray_stride(){ numWrite *= 2; ptr = (ADDRINT*) realloc (instrWrite, numWrite * sizeof (ADDRINT)); - if (ptr == (ADDRINT*) NULL) { - fprintf (stderr, "Not enough memory (in reallocate_writeArray_stride)\n"); + /*if (ptr == (ADDRINT*) NULL) { + cerr << "Not enough memory (in reallocate_writeArray_stride)" << endl; exit (1); - } + }*/ instrWrite = ptr; } @@ -269,10 +273,10 @@ void register_memRead_stride(ADDRINT ins_addr){ indices_memRead_size *= 2; ptr = (ADDRINT*) realloc(indices_memRead, indices_memRead_size*sizeof(ADDRINT)); - if(ptr == (ADDRINT*)NULL){ - fprintf(stderr,"Could not allocate memory (realloc in register_readMem)!\n"); + /*if(ptr == (ADDRINT*)NULL){ + cerr << "Could not allocate memory (realloc in register_readMem)!" << endl; exit(1); - } + }*/ indices_memRead = ptr; } @@ -290,10 +294,10 @@ void register_memWrite_stride(ADDRINT ins_addr){ indices_memWrite_size *= 2; ptr = (ADDRINT*) realloc(indices_memWrite, indices_memWrite_size*sizeof(ADDRINT)); - if(ptr == (ADDRINT*)NULL){ - fprintf(stderr,"Could not allocate memory (realloc in register_writeMem)!\n"); + /*if(ptr == (ADDRINT*)NULL){ + cerr << "Could not allocate memory (realloc in register_writeMem)!" << endl; exit(1); - } + }*/ indices_memWrite = ptr; } @@ -319,7 +323,7 @@ VOID readMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size){ } localReadDistrib[stride]++; - instrRead[index] = effAddr + size; + instrRead[index] = effAddr + size - 1; /* global stride */ /* avoid negative values, has to be done like this (not stride < 0 => stride = -stride (avoid problems with unsigned values)) */ @@ -332,7 +336,7 @@ VOID readMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size){ } globalReadDistrib[stride]++; - lastReadAddr = effAddr + size; + lastReadAddr = effAddr + size - 1; } VOID writeMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size){ @@ -352,7 +356,7 @@ VOID writeMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size){ } localWriteDistrib[stride]++; - instrWrite[index] = effAddr + size; + instrWrite[index] = effAddr + size - 1; /* global stride */ /* avoid negative values, has to be doen like this (not stride < 0 => stride = -stride) */ @@ -365,7 +369,7 @@ VOID writeMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size){ } globalWriteDistrib[stride]++; - lastWriteAddr = effAddr + size; + lastWriteAddr = effAddr + size - 1; } UINT32 stride_index_memRead1(ADDRINT a){ @@ -445,21 +449,21 @@ VOID fini_stride(INT32 code, VOID* v){ UINT64 cum; if(interval_size == -1){ - output_file_stride = fopen("stride_full_int_pin.out","w"); + output_file_stride.open("stride_full_int_pin.out", ios::out|ios::trunc); } else{ - output_file_stride = fopen("stride_phases_int_pin.out","a"); + output_file_stride.open("stride_phases_int_pin.out", ios::out|ios::app); } - fprintf(output_file_stride,"%lld",(long long)numReadInstrsAnalyzed); + output_file_stride << numReadInstrsAnalyzed; /* local read distribution */ cum = 0; for(i = 0; i < MAX_DISTR; i++){ cum += localReadDistrib[i]; if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ if(cum > 0) - fprintf(output_file_stride," %lld", (long long) cum); + output_file_stride << " " << cum; else - fprintf(output_file_stride," %d", 0); + output_file_stride << " 0"; } if(i == 262144) break; @@ -470,23 +474,23 @@ VOID fini_stride(INT32 code, VOID* v){ cum += globalReadDistrib[i]; if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ if(cum > 0) - fprintf(output_file_stride," %lld", (long long) cum); + output_file_stride << " " << cum; else - fprintf(output_file_stride," %d", 0); + output_file_stride << " 0"; } if(i == 262144) break; } - fprintf(output_file_stride," %lld",(long long)numWriteInstrsAnalyzed); + output_file_stride << " " << numWriteInstrsAnalyzed; /* local write distribution */ cum = 0; for(i = 0; i < MAX_DISTR; i++){ cum += localWriteDistrib[i]; if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ if(cum > 0) - fprintf(output_file_stride," %lld", (long long) cum); + output_file_stride << " " << cum; else - fprintf(output_file_stride," %d", 0); + output_file_stride << " 0"; } if(i == 262144) break; @@ -497,18 +501,18 @@ VOID fini_stride(INT32 code, VOID* v){ cum += globalWriteDistrib[i]; if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) ){ if(cum > 0) - fprintf(output_file_stride," %lld", (long long) cum); + output_file_stride << " " << cum; else - fprintf(output_file_stride," %d", 0); + output_file_stride << " 0"; } if(i == 262144){ if(cum > 0) - fprintf(output_file_stride," %lld\n", (long long) cum); + output_file_stride << " " << cum << endl; else - fprintf(output_file_stride," %d\n", 0); + output_file_stride << " 0" << endl; break; } } - fprintf(output_file_stride,"number of instructions: %lld\n", total_ins_count); - fclose(output_file_stride); + output_file_stride << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; + output_file_stride.close(); } diff --git a/mica_utils.cpp b/mica_utils.cpp index d88677b..a81bc93 100644 --- a/mica_utils.cpp +++ b/mica_utils.cpp @@ -36,28 +36,31 @@ memNode* install(nlist** table, ADDRINT key){ np = table[index]; if(np == (nlist*)NULL) { - if((np = (nlist*)malloc(sizeof(nlist))) == (nlist*)NULL){ - fprintf(stderr,"Not enough memory (in install)\n"); + np = (nlist*)malloc(sizeof(nlist)); + /*if((np = (nlist*)malloc(sizeof(nlist))) == (nlist*)NULL){ + cerr << "Not enough memory (in install)" << endl; exit(1); - } + }*/ table[index] = np; } else{ while(np->next != (nlist*)NULL){ np = np->next; } - if((np->next = (nlist*)malloc(sizeof(nlist))) == (nlist*)NULL){ - fprintf(stderr,"Not enough memory (in install (2))\n"); + np->next = (nlist*)malloc(sizeof(nlist)); + /*if((np->next = (nlist*)malloc(sizeof(nlist))) == (nlist*)NULL){ + cerr << "Not enough memory (in install (2))" << endl; exit(1); - } + }*/ np = np->next; } np->next = (nlist*)NULL; np->id = key; - if((np->mem = (memNode*)malloc (sizeof(memNode))) == (memNode*)NULL){ - fprintf(stderr,"Not enough memory (in install (3))\n"); + np->mem = (memNode*)malloc (sizeof(memNode)); + /*if((np->mem = (memNode*)malloc (sizeof(memNode))) == (memNode*)NULL){ + cerr << "Not enough memory (in install (3))" << endl; exit(1); - } + }*/ for(i = 0; i < MAX_MEM_ENTRIES; i++){ (np->mem)->timeAvailable[i] = 0; }