From d6148d66806e06d8761caa8dc97413b5fed84335 Mon Sep 17 00:00:00 2001 From: kehoste Date: Mon, 2 Nov 2009 20:44:42 +0000 Subject: [PATCH] completed implemtentation of flexible itypes with reading instruction groups from file and keeping track of instruction categories in 'other' group git-svn-id: https://acavus.elis.ugent.be/svn/MICA/trunk@27 807cdcb1-511e-4608-a72a-c2ff732aa92f --- RELEASE_NOTES | 14 +- mica_all.cpp | 163 +++++++--------- mica_ilp.cpp | 4 +- mica_init.cpp | 51 ++--- mica_itypes.cpp | 490 ++++++++++++++++++++++++++++++++++-------------- 5 files changed, 463 insertions(+), 259 deletions(-) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 4c9ede0..9826470 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,3 +1,15 @@ +X Xth 2009 +------------------ + +MICA v0.3 + +- increases flexibility of itypes analysis significantly + - instruction groups used in itypes analysis can be specified by the user now, + using a itypes.spec file; specify the filename in mica.conf using an entry like: + itypes_spec_file: + - by default, the old instruction groups are used + + September 22th 2009 ------------------- @@ -8,7 +20,7 @@ recent Pin kits. This small patch release should resolve these issues. - bug fixes: * adjusted makefile and README according to Pin kit directory tree changes - * adjusted mica_itypes.cpp to recognize both NOP and WIDENOP categories + * adjust mica_itypes.cpp to recognize both NOP and WIDENOP categories June 13th 2008 diff --git a/mica_all.cpp b/mica_all.cpp index 29098e0..b12fc56 100644 --- a/mica_all.cpp +++ b/mica_all.cpp @@ -10,7 +10,7 @@ /* MICA includes */ #include "mica_all.h" #include "mica_ilp.h" // needed for empty_all_buffer_all -#include "mica_itypes.h" // needed for itypes_count_* , itypes_instr_interval_output and itypes_instr_interval_reset +#include "mica_itypes.h" // needed for itypes_count , itypes_instr_interval_output and itypes_instr_interval_reset #include "mica_ppm.h" // needed for instrument_ppm_cond_br, ppm_instr_interval_output and ppm_instr_interval_reset #include "mica_reg.h" // needed for reg_instr_full, reg_instr_intervals, reg_instr_interval_output and reg_instr_interval_reset #include "mica_stride.h" // needed for stride_index_mem*, readMem_stride, writeMem_stride, stride_instr_interval_output and stride_instr_interval_reset @@ -24,6 +24,15 @@ extern INT64 interval_ins_count; extern INT64 interval_size; +extern identifier** group_identifiers; +extern INT64* group_ids_cnt; +extern INT64* group_counts; +extern INT64 number_of_groups; + +extern INT64 other_ids_cnt; +extern INT64 other_ids_max_cnt; +extern identifier* other_group_identifiers; + void init_all(){ init_ilp_all(); @@ -41,7 +50,7 @@ VOID all_instr_full_count(){ if(total_ins_count % PROGRESS_THRESHOLD == 0){ FILE* f = fopen("mica_progress.txt","w"); - fprintf(f,"%lld*10^9 instructions analyzed\n", total_ins_count/PROGRESS_THRESHOLD); + fprintf(f,"%lld*10^7 instructions analyzed\n", total_ins_count/PROGRESS_THRESHOLD); fclose(f); } } @@ -52,15 +61,15 @@ VOID all_instr_intervals_count(){ if(total_ins_count % PROGRESS_THRESHOLD == 0){ FILE* f = fopen("mica_progress.txt","w"); - fprintf(f,"%lld*10^9 instructions analyzed\n", total_ins_count/PROGRESS_THRESHOLD); + fprintf(f,"%lld*10^7 instructions analyzed\n", total_ins_count/PROGRESS_THRESHOLD); fclose(f); } } ADDRINT all_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, UINT32 stride_index_memread1, UINT32 stride_index_memread2, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){ - itypes_count_mem_read(); - itypes_count_mem_write(); + //itypes_count_mem_read(); + //itypes_count_mem_write(); readMem_stride(stride_index_memread1, read1_addr, read_size); readMem_stride(stride_index_memread2, read2_addr, read_size); writeMem_stride(stride_index_memwrite, write_addr, write_size); @@ -74,8 +83,8 @@ ADDRINT all_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRIN ADDRINT all_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT read_size, UINT32 stride_index_memread1, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){ - itypes_count_mem_read(); - itypes_count_mem_write(); + //itypes_count_mem_read(); + //itypes_count_mem_write(); readMem_stride(stride_index_memread1, read1_addr, read_size); writeMem_stride(stride_index_memwrite, write_addr, write_size); memOp(read1_addr, read_size); // memfootprint @@ -86,7 +95,7 @@ ADDRINT all_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT ADDRINT all_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, UINT32 stride_index_memread1, UINT32 stride_index_memread2){ - itypes_count_mem_read(); + //itypes_count_mem_read(); readMem_stride(stride_index_memread1, read1_addr, read_size); readMem_stride(stride_index_memread2, read2_addr, read_size); memOp(read1_addr, read_size); // memfootprint @@ -98,7 +107,7 @@ ADDRINT all_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read ADDRINT all_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_size, UINT32 stride_index_memread1){ - itypes_count_mem_read(); + //itypes_count_mem_read(); readMem_stride(stride_index_memread1, read1_addr, read_size); memOp(read1_addr, read_size); // memfootprint memreusedist_memRead(read1_addr, read_size); // memreusedist @@ -107,7 +116,7 @@ ADDRINT all_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_s ADDRINT all_buffer_instruction_write(void* _e, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){ - itypes_count_mem_write(); + //itypes_count_mem_write(); writeMem_stride(stride_index_memwrite, write_addr, write_size); memOp(write_addr, write_size); // memfootprint return ilp_buffer_instruction_write(_e, write_addr, write_size); @@ -156,7 +165,7 @@ VOID all_instr_interval(){ VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ - UINT32 i, maxNumRegsProd, maxNumRegsCons, regReadCnt, regWriteCnt, opCnt, regOpCnt; + UINT32 i, j, maxNumRegsProd, maxNumRegsCons, regReadCnt, regWriteCnt, opCnt, regOpCnt; REG reg; BOOL categorized = false; char cat[50]; @@ -313,104 +322,76 @@ VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)empty_ilp_buffer_all, IARG_END); /* +++ ITYPES +++ */ - // control flow instructions - if(strcmp(cat,"COND_BR") == 0 || strcmp(cat,"UNCOND_BR") == 0 || strcmp(opcode,"LEAVE") == 0 || strcmp(opcode,"RET_NEAR") == 0 || strcmp(opcode,"CALL_NEAR") == 0){ - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_control,IARG_END); - } - else{ - // arithmetic instructions (integer) - if( strcmp(cat,"LOGICAL") == 0 || strcmp(cat,"DATAXFER") == 0 || strcmp(cat,"BINARY") == 0 || strcmp(cat,"FLAGOP") == 0 || strcmp(cat,"BITBYTE") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_arith,IARG_END); - } - else{ - // floating point instructions - if(strcmp(cat,"X87_ALU") == 0 || strcmp(cat,"FCMOV") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); + + // go over all groups, increase group count if instruction matches that group + // group counts are increased at most once per instruction executed, + // even if the instruction matches multiple identifiers in that group + for(i=0; i < number_of_groups; i++){ + for(j=0; j < group_ids_cnt[i]; j++){ + if(group_identifiers[i][j].type == identifier_type::ID_TYPE_CATEGORY){ + if(strcmp(group_identifiers[i][j].str, cat) == 0){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); + categorized = true; + break; } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_fp,IARG_END); - } + } else{ - // pop/push instructions (stack usage) - if( (strcmp(cat,"POP") == 0) || (strcmp(cat,"PUSH") == 0)){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); + if(group_identifiers[i][j].type == identifier_type::ID_TYPE_OPCODE){ + if(strcmp(group_identifiers[i][j].str, opcode) == 0){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); + categorized = true; + break; } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_stack,IARG_END); } else{ - // [!] shift instructions (bitwise) - if(strcmp(cat,"SHIFT") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_shift,IARG_END); - } - else{ - // [!] string instructions - if(strcmp(cat,"STRINGOP") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } + if(group_identifiers[i][j].type == identifier_type::ID_TYPE_SPECIAL){ + if(strcmp(group_identifiers[i][j].str, "mem_read") == 0 && INS_IsMemoryRead(ins) ){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_string,IARG_END); + break; } else{ - // [!] MMX/SSE instructions - if(strcmp(cat,"MMX") == 0 || strcmp(cat,"SSE") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } + if(strcmp(group_identifiers[i][j].str, "mem_write") == 0 && INS_IsMemoryWrite(ins) ){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_sse,IARG_END); - } + break; + } else{ - // other (interrupts, rotate instructions, semaphore, conditional move, system) - if(strcmp(cat,"INTERRUPT") == 0 || strcmp(cat,"ROTATE") == 0 || strcmp(cat,"SEMAPHORE") == 0 || strcmp(cat,"CMOV") == 0 || strcmp(cat,"SYSTEM") == 0 || strcmp(cat,"MISC") == 0 || strcmp(cat,"PREFETCH") == 0 ){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_other,IARG_END); - } - else{ - // [!] NOP instructions - if(strcmp(cat,"NOP") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_nop,IARG_END); - } - } } } } + else{ + fprintf(stderr, "ERROR! Unknown identifier type specified (%d).\n", group_identifiers[i][j].type); + } } } } } - if(!categorized){ - fprintf(stderr,"What the hell ?!? I don't know this one yet! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } + // count instruction that don't fit in any of the specified categories in the last group + if( !categorized ){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, (unsigned int)number_of_groups, IARG_END); + + // check whether this category is already known in the 'other' group + for(i=0; i < other_ids_cnt; i++){ + if(strcmp(other_group_identifiers[i].str, cat) == 0) + break; + } + + // if a new instruction category is found, add it to the set + if(i == other_ids_cnt){ + other_group_identifiers[other_ids_cnt].type = identifier_type::ID_TYPE_CATEGORY; + other_group_identifiers[other_ids_cnt].str = (char*)malloc((strlen(cat)+1)*sizeof(char)); + strcpy(other_group_identifiers[other_ids_cnt].str, cat); + other_ids_cnt++; + } + + // prepare for (possible) next category + if(other_ids_cnt == other_ids_max_cnt){ + other_ids_max_cnt *= 2; + other_group_identifiers = (identifier*)realloc(other_group_identifiers, other_ids_max_cnt*sizeof(identifier)); + } + } /* +++ PPM *** */ if(strcmp(cat,"COND_BR") == 0){ diff --git a/mica_ilp.cpp b/mica_ilp.cpp index e71e807..3535049 100644 --- a/mica_ilp.cpp +++ b/mica_ilp.cpp @@ -17,7 +17,7 @@ const UINT32 win_sizes[ILP_WIN_SIZE_CNT] = {32, 64, 128, 256}; -extern UINT32 _win_size; +extern UINT32 _ilp_win_size; UINT32 win_size; /* buffer settings */ @@ -102,7 +102,7 @@ void init_ilp_one(){ init_ilp_common(); init_ilp_buffering(); - win_size = _win_size; + win_size = _ilp_win_size; size_pow_times = 10; if((all_times = (UINT64*)malloc((1 << size_pow_times) * sizeof(UINT64))) == (UINT64*)NULL){ diff --git a/mica_init.cpp b/mica_init.cpp index f2ab4cf..b92e642 100644 --- a/mica_init.cpp +++ b/mica_init.cpp @@ -15,37 +15,26 @@ */ void setup_mica_log(FILE* *log){ - int cnt; char name[20]; sprintf(name, "mica.log"); - FILE* test = fopen(name,"r"); - - cnt = 0; - while(test != (FILE*)NULL){ - sprintf(name, "mica.log.%d", ++cnt); - test = fopen(name,"r"); - } *log = fopen(name,"w"); if( *log == (FILE*)NULL ){ fprintf(stderr,"Could not create mica.log, aborting!\n"); exit(1); } - else{ - if( strcmp(name,"mica.log") != 0) - fprintf(stderr,"\n WARNING: Writing log messages to %s, because a file named \'mica.log\' already exists.\n", name); - } } /* * Read mica.conf config file for MICA. * - * analysis_type: 'all' | 'ilp' | 'ilp_one' | 'itypes' | 'ppm' | 'reg' | 'stride' | 'workingset' + * analysis_type: 'all' | 'ilp' | 'ilp_one' | 'itypes' | 'ppm' | 'reg' | 'stride' | 'workingset' | 'custom' * interval_size: 'full' | * ilp_size: + * itypes_spec_file: */ -void read_config(FILE* log, INT64* interval_size, MODE* mode, UINT32* _win_size){ +void read_config(FILE* log, INT64* interval_size, MODE* mode, UINT32* _ilp_win_size, char** _itypes_spec_file){ char* string; FILE* config_file = fopen("mica.conf","r"); @@ -66,7 +55,6 @@ void read_config(FILE* log, INT64* interval_size, MODE* mode, UINT32* _win_size) } fscanf(config_file,"analysis_type: %s\n",string); - DEBUG_MSG("Analysis type: %s\n",string); // figure out mode we are running in @@ -115,15 +103,15 @@ void read_config(FILE* log, INT64* interval_size, MODE* mode, UINT32* _win_size) LOG_MSG("Measuring MEMREUSEDIST characteristics...\n"); } else{ - if(strcmp(string,"mytype") == 0){ - *mode = MODE_MYTYPE; - LOG_MSG("Measuring MYTYPE characteristics...\n"); + if(strcmp(string,"custom") == 0){ + *mode = MODE_CUSTOM; + LOG_MSG("Measuring CUSTOM characteristics...\n"); } else{ LOG_MSG("\nERROR: Unknown set of characteristics chosen!\n"); - LOG_MSG(" Available characteristics include: 'all', 'ilp', 'ilp_one', 'itypes', 'ppm', 'reg', 'stride', 'memfootprint', 'memreusedist'\n"); + LOG_MSG(" Available characteristics include: 'all', 'ilp', 'ilp_one', 'itypes', 'ppm', 'reg', 'stride', 'memfootprint', 'memreusedist', 'custom'\n"); ERROR("\nERROR: Unknown set of characteristics chosen!\n"); - ERROR(" Available characteristics include: 'all', 'ilp', 'ilp_one', 'itypes', 'ppm', 'reg', 'stride', 'memfootprint', 'memreusedist'\n"); + ERROR(" Available characteristics include: 'all', 'ilp', 'ilp_one', 'itypes', 'ppm', 'reg', 'stride', 'memfootprint', 'memreusedist', 'custom'\n"); } } } @@ -148,12 +136,29 @@ void read_config(FILE* log, INT64* interval_size, MODE* mode, UINT32* _win_size) LOG_MSG("Returning data for each interval of %lld instructions...\n", (INT64)*interval_size); } + // read window size for ILP_ONE if(*mode == MODE_ILP_ONE){ - fscanf(config_file,"ilp_size: %s\n", string); - *_win_size = (UINT32)atoi(string); - LOG_MSG("ILP window size: %d\n", *_win_size); + if(fscanf(config_file,"ilp_size: %s\n", string) == 1){ + *_ilp_win_size = (UINT32)atoi(string); + LOG_MSG("ILP window size: %d\n", *_ilp_win_size); + } + else{ + fprintf(stderr, "ERROR! ILP_ONE mode was specified, but no window size was found along with it!\n"); + exit(-1); + } } + // possibly read itypes specification filename + *_itypes_spec_file = NULL; + if(*mode == MODE_ITYPES || *mode == MODE_ALL){ + if(fscanf(config_file,"itypes_spec_file: %s\n", string) == 1){ + *_itypes_spec_file = (char*)malloc((strlen(string)+1)*sizeof(char)); + strcpy(*_itypes_spec_file, string); + fprintf(stdout,"ITYPES spec file: %s\n", *_itypes_spec_file); + LOG_MSG("ITYPES spec file: %s\n", *_itypes_spec_file); + } + } + DEBUG_MSG("All done reading config\n"); } diff --git a/mica_itypes.cpp b/mica_itypes.cpp index c00f9d0..c08e817 100644 --- a/mica_itypes.cpp +++ b/mica_itypes.cpp @@ -17,45 +17,40 @@ extern INT64 interval_size; extern INT64 interval_ins_count; extern INT64 total_ins_count; +extern char* _itypes_spec_file; FILE* output_file_itypes; -INT64 mem_read_cnt; -INT64 mem_write_cnt; -INT64 control_cnt; -INT64 arith_cnt; -INT64 fp_cnt; -INT64 stack_cnt; -INT64 shift_cnt; -INT64 string_cnt; -INT64 sse_cnt; -INT64 other_cnt; -INT64 nop_cnt; +identifier** group_identifiers; +INT64* group_ids_cnt; +INT64* group_counts; +INT64 number_of_groups; -/* counter functions */ +INT64 other_ids_cnt; +INT64 other_ids_max_cnt; +identifier* other_group_identifiers; +/* counter functions */ ADDRINT itypes_instr_intervals(){ return (ADDRINT)(total_ins_count % interval_size == 0); }; VOID itypes_instr_interval_output(){ + int i; output_file_itypes = fopen("itypes_phases_int_pin.out","a"); - fprintf(output_file_itypes, "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", (long long)interval_size, (long long)mem_read_cnt, (long long)mem_write_cnt, (long long)control_cnt, (long long)arith_cnt, (long long)fp_cnt, (long long)stack_cnt, (long long)shift_cnt, (long long)string_cnt, (long long)sse_cnt, (long long)other_cnt, (long long)nop_cnt); + fprintf(output_file_itypes, "%lld", (long long)interval_size); + for(i=0; i < number_of_groups+1; i++){ + fprintf(output_file_itypes, " %lld", group_counts[i]); + } + fprintf(output_file_itypes, "\n"); fclose(output_file_itypes); } VOID itypes_instr_interval_reset(){ - mem_read_cnt = 0; - mem_write_cnt = 0; - control_cnt = 0; - arith_cnt = 0; - fp_cnt = 0; - stack_cnt = 0; - shift_cnt = 0; - string_cnt = 0; - sse_cnt = 0; - other_cnt = 0; - nop_cnt = 0; + int i; + for(i=0; i < number_of_groups+1; i++){ + group_counts[i] = 0; + } } VOID itypes_instr_interval(){ @@ -65,35 +60,271 @@ VOID itypes_instr_interval(){ interval_ins_count = 0; } -VOID itypes_count_mem_read() { mem_read_cnt++; } -VOID itypes_count_mem_write() { mem_write_cnt++; } -VOID itypes_count_control() { control_cnt++; } -VOID itypes_count_arith() { arith_cnt++; } -VOID itypes_count_fp() { fp_cnt++; } -VOID itypes_count_stack() { stack_cnt++; } -VOID itypes_count_shift() { shift_cnt++; } -VOID itypes_count_string() { string_cnt++; } -VOID itypes_count_sse() { sse_cnt++; } -VOID itypes_count_other() { other_cnt++; } -VOID itypes_count_nop() { nop_cnt++; } +VOID itypes_count(UINT32 gid){ + group_counts[gid]++; +}; + +// initialize default groups +VOID init_itypes_default_groups(){ + + number_of_groups = 11; + + group_identifiers = (identifier**)malloc((number_of_groups+1)*sizeof(identifier*)); + group_ids_cnt = (INT64*)malloc((number_of_groups+1)*sizeof(INT64)); + group_counts = (INT64*)malloc((number_of_groups+1)*sizeof(INT64)); + for(int i=0; i < number_of_groups+1; i++){ + group_counts[i] = 0; + } + + // memory reads + group_ids_cnt[0] = 1; + group_identifiers[0] = (identifier*)malloc(group_ids_cnt[0]*sizeof(identifier)); + group_identifiers[0][0].type = identifier_type::ID_TYPE_SPECIAL; + group_identifiers[0][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[0][0].str, "mem_read"); + + // memory writes + group_ids_cnt[1] = 1; + group_identifiers[1] = (identifier*)malloc(group_ids_cnt[1]*sizeof(identifier)); + group_identifiers[1][0].type = identifier_type::ID_TYPE_SPECIAL; + group_identifiers[1][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[1][0].str, "mem_write"); + + // control flow instructions + group_ids_cnt[2] = 5; + group_identifiers[2] = (identifier*)malloc(group_ids_cnt[2]*sizeof(identifier)); + group_identifiers[2][0].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[2][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[2][0].str, "COND_BR"); + group_identifiers[2][1].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[2][1].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[2][1].str, "UNCOND_BR"); + group_identifiers[2][2].type = identifier_type::ID_TYPE_OPCODE; + group_identifiers[2][2].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[2][2].str, "LEAVE"); + group_identifiers[2][3].type = identifier_type::ID_TYPE_OPCODE; + group_identifiers[2][3].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[2][3].str, "RET_NEAR"); + group_identifiers[2][4].type = identifier_type::ID_TYPE_OPCODE; + group_identifiers[2][4].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[2][4].str, "CALL_NEAR"); + + // arithmetic instructions (integer) + group_ids_cnt[3] = 5; + group_identifiers[3] = (identifier*)malloc(group_ids_cnt[3]*sizeof(identifier)); + group_identifiers[3][0].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[3][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[3][0].str, "LOGICAL"); + group_identifiers[3][1].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[3][1].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[3][1].str, "DATAXFER"); + group_identifiers[3][2].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[3][2].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[3][2].str, "BINARY"); + group_identifiers[3][3].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[3][3].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[3][3].str, "FLAGOP"); + group_identifiers[3][4].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[3][4].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[3][4].str, "BITBYTE"); + + // floating point instructions + group_ids_cnt[4] = 2; + group_identifiers[4] = (identifier*)malloc(group_ids_cnt[4]*sizeof(identifier)); + group_identifiers[4][0].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[4][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[4][0].str, "X87_ALU"); + group_identifiers[4][1].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[4][1].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[4][1].str, "FCMOV"); + + // pop/push instructions (stack usage) + group_ids_cnt[5] = 2; + group_identifiers[5] = (identifier*)malloc(group_ids_cnt[5]*sizeof(identifier)); + group_identifiers[5][0].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[5][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[5][0].str, "POP"); + group_identifiers[5][1].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[5][1].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[5][1].str, "PUSH"); + + // [!] shift instructions (bitwise) + group_ids_cnt[6] = 1; + group_identifiers[6] = (identifier*)malloc(group_ids_cnt[6]*sizeof(identifier)); + group_identifiers[6][0].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[6][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[6][0].str, "SHIFT"); + + // [!] string instructions + group_ids_cnt[7] = 1; + group_identifiers[7] = (identifier*)malloc(group_ids_cnt[7]*sizeof(identifier)); + group_identifiers[7][0].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[7][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[7][0].str, "STRINGOP"); + + // [!] MMX/SSE instructions + group_ids_cnt[8] = 2; + group_identifiers[8] = (identifier*)malloc(group_ids_cnt[8]*sizeof(identifier)); + group_identifiers[8][0].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[8][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[8][0].str, "MMX"); + group_identifiers[8][1].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[8][1].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[8][1].str, "SSE"); + + // other (interrupts, rotate instructions, semaphore, conditional move, system) + group_ids_cnt[9] = 7; + group_identifiers[9] = (identifier*)malloc(group_ids_cnt[9]*sizeof(identifier)); + group_identifiers[9][0].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[9][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[9][0].str, "INTERRUPT"); + group_identifiers[9][1].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[9][1].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[9][1].str, "ROTATE"); + group_identifiers[9][2].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[9][2].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[9][2].str, "SEMAPHORE"); + group_identifiers[9][3].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[9][3].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[9][3].str, "CMOV"); + group_identifiers[9][4].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[9][4].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[9][4].str, "SYSTEM"); + group_identifiers[9][5].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[9][5].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[9][5].str, "MISC"); + group_identifiers[9][6].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[9][6].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[9][6].str, "PREFETCH"); + + // [!] NOP instructions + group_ids_cnt[10] = 2; + group_identifiers[10] = (identifier*)malloc(group_ids_cnt[10]*sizeof(identifier)); + group_identifiers[10][0].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[10][0].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[10][0].str, "WIDENOP"); + group_identifiers[10][1].type = identifier_type::ID_TYPE_CATEGORY; + group_identifiers[10][1].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[10][1].str, "NOP"); +} /* initializing */ -void init_itypes(){ +VOID init_itypes(){ - /* initializing total instruction counts is done in mica.cpp */ - - mem_read_cnt = 0; - mem_write_cnt = 0; - control_cnt = 0; - arith_cnt = 0; - fp_cnt = 0; - stack_cnt = 0; - shift_cnt = 0; - string_cnt = 0; - sse_cnt = 0; - other_cnt = 0; - nop_cnt = 0; + int i, j; + int gid, sgid; + char type[100]; + char str[100]; + /* try and open instruction groups specification file */ + if(_itypes_spec_file != NULL){ + FILE* f = fopen(_itypes_spec_file, "r"); + if(f != NULL){ + // count number of groups + number_of_groups = 0; + while( feof(f) == 0){ + fscanf(f, "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); + if(gid > number_of_groups) + number_of_groups++; + } + fclose(f); + number_of_groups++; + fprintf(stderr, "==> found %lld groups\n", (long long)number_of_groups); + + group_identifiers = (identifier**)malloc((number_of_groups+1)*sizeof(identifier*)); + group_ids_cnt = (INT64*)malloc((number_of_groups+1)*sizeof(INT64)); + group_counts = (INT64*)malloc((number_of_groups+1)*sizeof(INT64)); + for(i=0; i < number_of_groups+1; i++){ + group_counts[i] = 0; + } + + // count number of subgroups per group + f = fopen(_itypes_spec_file, "r"); + i=0; + while( feof(f) == 0){ + fscanf(f, "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); + if(gid == i){ + group_ids_cnt[i]++; + } + else{ + group_identifiers[i] = (identifier*)malloc(group_ids_cnt[i]*sizeof(identifier)); + i++; + group_ids_cnt[i]++; + } + } + group_identifiers[i] = (identifier*)malloc(group_ids_cnt[i]*sizeof(identifier)); + fclose(f); + + // save subgroup types and identifiers + f = fopen(_itypes_spec_file, "r"); + i=0; + while( feof(f) == 0){ + fscanf(f, "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); + if(strcmp(type, "CATEGORY") == 0){ + group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_CATEGORY; + } + else{ + if(strcmp(type, "OPCODE") == 0){ + group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_OPCODE; + } + else{ + if(strcmp(type, "SPECIAL") == 0){ + group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_SPECIAL; + } + else{ + fprintf(stderr, "ERROR! Unknown subgroup type found (\"%s\").\n", type); + fprintf(stderr, " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}.\n"); + exit(-1); + } + } + } + group_identifiers[gid][sgid].str = (char*)malloc(20*sizeof(char)); + strcpy(group_identifiers[gid][sgid].str, str); + } + fclose(f); + + // print out groups read + for(i=0; i < number_of_groups; i++){ + fprintf(stderr, " group %d (#: %lld): ", i, (long long)group_ids_cnt[i]); + for(j=0; j < group_ids_cnt[i]; j++){ + fprintf(stderr, "%s ", group_identifiers[i][j].str); + switch(group_identifiers[i][j].type){ + case identifier_type::ID_TYPE_CATEGORY: + fprintf(stderr, "[CAT]; "); + break; + case identifier_type::ID_TYPE_OPCODE: + fprintf(stderr, "[OPCODE]; "); + break; + case identifier_type::ID_TYPE_SPECIAL: + fprintf(stderr, "[SPECIAL]; "); + break; + default: + fprintf(stderr, "ERROR! Unknown subgroup type found for [%d][%d] (\"%d\").\n", i, j, group_identifiers[i][j].type); + fprintf(stderr, " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}.\n"); + exit(-1); + break; + } + } + fprintf(stderr, "\n"); + } + + // allocate space for identifiers of 'other' group + other_ids_cnt = 0; + other_ids_max_cnt = 2; + other_group_identifiers = (identifier*)malloc(other_ids_max_cnt*sizeof(identifier)); + } + else{ + fprintf(stderr, "ERROR! Failed to open file \"%s\" containing instruction groups specification.\n", _itypes_spec_file); + exit(-1); + } + } + else{ + // if no specification file was found, just use defaults (compatible with MICA v0.23 and older) + init_itypes_default_groups(); + } + + // (initializing total instruction counts is done in mica.cpp) + if(interval_size != -1){ output_file_itypes = fopen("itypes_phases_int_pin.out","w"); fclose(output_file_itypes); @@ -103,121 +334,82 @@ void init_itypes(){ /* instrumenting (instruction level) */ VOID instrument_itypes(INS ins, VOID* v){ + int i,j; char cat[50]; char opcode[50]; strcpy(cat,CATEGORY_StringShort(INS_Category(ins)).c_str()); strcpy(opcode,INS_Mnemonic(ins).c_str()); BOOL categorized = false; - - // instructions which read from memory - if( INS_IsMemoryRead(ins) ){ - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_mem_read,IARG_END); - } - - // instructions which write to memory - if( INS_IsMemoryWrite(ins) ){ - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_mem_write,IARG_END); - } - - // control flow instructions - if(strcmp(cat,"COND_BR") == 0 || strcmp(cat,"UNCOND_BR") == 0 || strcmp(opcode,"LEAVE") == 0 || strcmp(opcode,"RET_NEAR") == 0 || strcmp(opcode,"CALL_NEAR") == 0){ - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_control,IARG_END); - } - else{ - // arithmetic instructions (integer) - if( strcmp(cat,"LOGICAL") == 0 || strcmp(cat,"DATAXFER") == 0 || strcmp(cat,"BINARY") == 0 || strcmp(cat,"FLAGOP") == 0 || strcmp(cat,"BITBYTE") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_arith,IARG_END); - } - else{ - // floating point instructions - if(strcmp(cat,"X87_ALU") == 0 || strcmp(cat,"FCMOV") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); + // go over all groups, increase group count if instruction matches that group + // group counts are increased at most once per instruction executed, + // even if the instruction matches multiple identifiers in that group + for(i=0; i < number_of_groups; i++){ + for(j=0; j < group_ids_cnt[i]; j++){ + if(group_identifiers[i][j].type == identifier_type::ID_TYPE_CATEGORY){ + if(strcmp(group_identifiers[i][j].str, cat) == 0){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); + categorized = true; + break; } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_fp,IARG_END); - } + } else{ - // pop/push instructions (stack usage) - if( (strcmp(cat,"POP") == 0) || (strcmp(cat,"PUSH") == 0)){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); + if(group_identifiers[i][j].type == identifier_type::ID_TYPE_OPCODE){ + if(strcmp(group_identifiers[i][j].str, opcode) == 0){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); + categorized = true; + break; } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_stack,IARG_END); } else{ - // [!] shift instructions (bitwise) - if(strcmp(cat,"SHIFT") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_shift,IARG_END); - } - else{ - // [!] string instructions - if(strcmp(cat,"STRINGOP") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } + if(group_identifiers[i][j].type == identifier_type::ID_TYPE_SPECIAL){ + if(strcmp(group_identifiers[i][j].str, "mem_read") == 0 && INS_IsMemoryRead(ins) ){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_string,IARG_END); + break; } else{ - // [!] MMX/SSE instructions - if(strcmp(cat,"MMX") == 0 || strcmp(cat,"SSE") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } + if(strcmp(group_identifiers[i][j].str, "mem_write") == 0 && INS_IsMemoryWrite(ins) ){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_sse,IARG_END); - } + break; + } else{ - // other (interrupts, rotate instructions, semaphore, conditional move, system) - if(strcmp(cat,"INTERRUPT") == 0 || strcmp(cat,"ROTATE") == 0 || strcmp(cat,"SEMAPHORE") == 0 || strcmp(cat,"CMOV") == 0 || strcmp(cat,"SYSTEM") == 0 || strcmp(cat,"MISC") == 0 || strcmp(cat,"PREFETCH") == 0 ){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_other,IARG_END); - } - else{ - // [!] NOP instructions - if(strcmp(cat,"WIDENOP") == 0 || strcmp(cat,"NOP") == 0){ - if(categorized){ - fprintf(stderr, "ERROR: Already categorized! (cat: %s, opcode: %s)\n", cat, opcode); - exit(1); - } - categorized = true; - INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count_nop,IARG_END); - } - } } } } + else{ + fprintf(stderr, "ERROR! Unknown identifier type specified (%d).\n", group_identifiers[i][j].type); + } } } } } - if(!categorized){ - fprintf(stderr,"What the hell ?!? I don't know this one yet! (cat: %s, opcode: %s) -> not NOP\n", cat, opcode); - exit(1); - } + // count instruction that don't fit in any of the specified categories in the last group + if( !categorized ){ + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, (unsigned int)number_of_groups, IARG_END); + + // check whether this category is already known in the 'other' group + for(i=0; i < other_ids_cnt; i++){ + if(strcmp(other_group_identifiers[i].str, cat) == 0) + break; + } + + // if a new instruction category is found, add it to the set + if(i == other_ids_cnt){ + other_group_identifiers[other_ids_cnt].type = identifier_type::ID_TYPE_CATEGORY; + other_group_identifiers[other_ids_cnt].str = (char*)malloc((strlen(cat)+1)*sizeof(char)); + strcpy(other_group_identifiers[other_ids_cnt].str, cat); + other_ids_cnt++; + } + + // prepare for (possible) next category + if(other_ids_cnt == other_ids_max_cnt){ + other_ids_max_cnt *= 2; + other_group_identifiers = (identifier*)realloc(other_group_identifiers, other_ids_max_cnt*sizeof(identifier)); + } + } /* inserting calls for counting instructions is done in mica.cpp */ if(interval_size != -1){ @@ -229,15 +421,29 @@ VOID instrument_itypes(INS ins, VOID* v){ /* finishing... */ VOID fini_itypes(INT32 code, VOID* v){ + int i; if(interval_size == -1){ output_file_itypes = fopen("itypes_full_int_pin.out","w"); - fprintf(output_file_itypes, "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", (long long)total_ins_count, (long long)mem_read_cnt, (long long)mem_write_cnt, (long long)control_cnt, (long long)arith_cnt, (long long)fp_cnt, (long long)stack_cnt, (long long)shift_cnt, (long long)string_cnt, (long long)sse_cnt, (long long)other_cnt, (long long)nop_cnt); + fprintf(output_file_itypes, "%lld", (long long)total_ins_count); + for(i=0; i < number_of_groups+1; i++){ + fprintf(output_file_itypes, " %lld", group_counts[i]); + } + fprintf(output_file_itypes, "\n"); } else{ output_file_itypes = fopen("itypes_phases_int_pin.out","a"); - fprintf(output_file_itypes, "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", (long long)interval_ins_count, (long long)mem_read_cnt, (long long)mem_write_cnt, (long long)control_cnt, (long long)arith_cnt, (long long)fp_cnt, (long long)stack_cnt, (long long)shift_cnt, (long long)string_cnt, (long long)sse_cnt, (long long)other_cnt, (long long)nop_cnt); + fprintf(output_file_itypes, "%lld", (long long)interval_ins_count); + for(i=0; i < number_of_groups+1; i++){ + fprintf(output_file_itypes, " %lld", group_counts[i]); + } } fprintf(output_file_itypes,"number of instructions: %lld\n", total_ins_count); fclose(output_file_itypes); + + // print instruction categories in 'other' group of instructions + FILE* output_file_other_group_categories = fopen("itypes_other_group_categories.txt", "w"); + for(i=0; i < other_ids_cnt; i++){ + fprintf(output_file_other_group_categories, "%s\n", other_group_identifiers[i].str); + } }