diff --git a/collect_data.py b/collect_data.py new file mode 100755 index 000000000..97e2c98da --- /dev/null +++ b/collect_data.py @@ -0,0 +1,183 @@ +#!/usr/bin/python + +''' + Python file that loads the data containing time stamps of all commands + executing during booting time of a virtual machine and analyzes them to + get duration of each so it can provide the best part to be optimized + and reduced in a sense of time. +''' + +import os +import numpy as numpy +import fnmatch +from operator import itemgetter + + +if ("ASH_SCORES" in os.environ): + directoryScores = os.environ["ASH_SCORES"] +else: + print("Chosen directory with data from BusyBox ash does not exist.") + exit() + +def loadScores(): + ''' + Method that loads all time stamps related to the specific commands, + removes duplicates and time stamps that were used just as an + orientation (e.g. -1). + Result is written in a text file named "all". + ''' + + mergedFiles = directoryScores + "/all" + fAll = open(mergedFiles, "w") + fAll.write("command!start!pid!end\n") + allWrittenLines = [] + for scorePid in fnmatch.filter(os.listdir(directoryScores), 'scores.csv.*'): + + scoreFile = directoryScores + "/" + scorePid + f = open(scoreFile, "r") + lines = f.readlines() + f.close() + + allShareLines = [] + sharePidLines = [] + listWritten = [] + for line in lines: + flag = False + if (line != "command!start!pid!end\n") : + #set the flag if line contains -1 meaning it takes more time to execute complex commands + for i in range ( len(line.split('!'))): + item = line.split('!')[i] + if ("-1" in item and (len(item) == 2 or item.endswith('\n'))): + flag = True + + #list of repeating pids + if (flag == False and findByPid(line, listWritten) == False and line not in allWrittenLines): + fAll.write(line) + listWritten.append(line) + + for i in range(len(listWritten)): + allWrittenLines.append(listWritten[i]) + + + + fAll.close() + + +def findByPid(line, writtenLines): + ''' + Method that returns true statement if there already exists a time stamp + related to the specific pid, i.e. if that command is already written + in a text file with results. + ''' + + for i in range(len(writtenLines)): + if (len(line.split('!')) == len(writtenLines[i].split('!'))): + countTheSame = 0 + for j in range(len(writtenLines[i].split('!'))): + if (writtenLines[i].split('!')[j] == line.split('!')[j]): + countTheSame += 1 + if (countTheSame == len(writtenLines[i].split('!'))-1): + return True + return False + + + +def hasSamePid(first, second): + ''' + Method that returns true statement if two lines containing the + command, start time stamp and pid, i.e. end time stamp and pid have + the same pid so that duration can be calculated for that specific pid. + ''' + + first = first.split('!')[2] + if first.endswith('\n'): + first = first[:-1] + + if (first == second.split('!')[2]): + return True + return False + + + +def calculateDuration(): + ''' + Given all scores from the file "all", this method needs to group lines + two by two based on its pid to calculate duraion of each process that + is executing. When duration is calculated, results are sorted based + on it value and saved to a new file named "sortedAll". + ''' + + mergedFiles = directoryScores + "/all" + newSorted = directoryScores + "/sortedAll" + fAll = open(mergedFiles, "r") + fSort = open(newSorted, "w") + fSort.write("command ! total ! nbOfTimes ! avgDuration\n") + lines = fAll.readlines() + minStart = float("inf") + maxEnd = 0 + + for i in range(len(lines)): + + if (lines[i] != "command!start!pid!end\n"): + + if (len(lines[i].split("!")) == 3): + firstTime = lines[i].split('!')[1] + firstLine = [long(s) for s in firstTime.split() if s.isdigit()] + if (firstLine[0] < minStart): + + minStart = firstLine[0] + else: + firstTime = lines[i].split('!')[3] + firstLine = [long(s) for s in firstTime.split() if s.isdigit()] + if (firstLine[0] > maxEnd): + + maxEnd = firstLine[0] + + + duration = maxEnd- minStart + writeLine = "total duration: " + "%ld" % duration + "\n" + fSort.write(writeLine) + + usedCommands=[] + hashAll = {} + for i in range(1, len(lines) - 1): + if (lines[i] != "command!start!pid!end\n" and len(lines[i].split('!')) == 3): + j = i+1 + while (j < len(lines)): + secondLine = lines[j] + if (len(lines[i].split('!')) < len(secondLine.split('!'))): + if (hasSamePid(lines[i],secondLine)): + a = lines[i].split('!')[2] + if a.endswith('\n'): + a = a[:-1] + firstTime = lines[i].split('!')[1] + firstLine = [long(s) for s in firstTime.split() if s.isdigit()] + secondTime= secondLine.split('!')[3] + secondLine = [long(s) for s in secondTime.split() if s.isdigit()] + + duration = secondLine[0] - firstLine[0] + comLine = lines[i].split('!')[0] + if (comLine not in usedCommands): + usedCommands.append(comLine) + hashAll[comLine] = [duration, 1, duration] + else: + hashAll[comLine][0] += duration + hashAll[comLine][1] += 1 + hashAll[comLine][2] = hashAll[comLine][0]/hashAll[comLine][1] + break + else: + j += 1 + continue + else: + j += 1 + continue + + hashAll=sorted(hashAll.items(), key=lambda e: e[1][2]) + for i in range(len(hashAll)): + writeLine = "\n" + hashAll[i][0] + "!" + " %ld " % hashAll[i][1][0] + "!" + " %ld " % hashAll[i][1][1] + "!" + " %ld " % hashAll[i][1][2] + fSort.write(writeLine) + fAll.close() + fSort.close() + + + diff --git a/main.py b/main.py new file mode 100755 index 000000000..70973c948 --- /dev/null +++ b/main.py @@ -0,0 +1,13 @@ +#!/usr/bin/python + +''' + python file to run all functions needed to analyze + data from busybox ash +''' + +import collect_data as cd + +cd.loadScores() + +cd.calculateDuration() + diff --git a/shell/ash.c b/shell/ash.c index c5ad96909..210af7ecb 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -32,20 +32,28 @@ #define DEBUG_TIME 0 #define DEBUG_PID 1 #define DEBUG_SIG 1 +#define GET_TIME_CONSUMPTION #define PROFILE 0 #define JOBS ENABLE_ASH_JOB_CONTROL #include +#include +#include #include #include #include #include /* for setting $HOSTNAME */ +#include +#include +#include #include "busybox.h" /* for applet_names */ #include "unicode.h" +#include +#include #include "shell_common.h" #if ENABLE_SH_MATH_SUPPORT # include "math.h" @@ -3388,6 +3396,148 @@ static smallint doing_jobctl; //references:8 static void setjobctl(int); #endif + +#ifdef GET_TIME_CONSUMPTION +// list of 5 possible items where integer value 'type' +// refers to: 0 = start pid, 1 = end pid, 2 = start time, +// 3 = end time, 4 = command; +// and 'item' pointer whose type is not fixed. +struct event_list +{ + int type; + void *item; + struct event_list *next; +}; + +struct event_list *head = NULL; +struct event_list *curr = NULL; + +struct event_list* create_ev_list(int type, long int valInt, char *valChar) +{ + struct event_list *ev_list = (struct event_list*)malloc(sizeof(struct event_list)); + if(NULL == ev_list) + { + return NULL; + } + switch(type) + { + case 0: //start pid, type long integer + ev_list->item = valInt; + break; + case 1: //end pid, type long integer + ev_list->item = valInt; + break; + case 2: //start time, type long integer + ev_list->item = valInt; + break; + case 3: //end time, type long integer + ev_list->item = valInt; + break; + case 4: //command, type pointer to characters + ev_list->item = valChar; + break; + default: + abort(); + } + ev_list->type = type; + ev_list->next = NULL; + head = curr = ev_list; + + return ev_list; +} + + + +struct event_list* store_ev_list(int type, long int valInt, char *valChar) +{ + if(NULL == head) + { + return (create_ev_list(type,valInt,valChar)); + } + + struct event_list *ev_list = (struct event_list*)malloc(sizeof(struct event_list)); + if(NULL == ev_list) + { + return; + } + + switch(type) + { + case 0: + ev_list->type = type; + ev_list->item = valInt; + break; + case 1: + ev_list->type = type; + ev_list->item = valInt; + break; + case 2: + ev_list->type = type; + ev_list->item = valInt; + break; + case 3: + ev_list->type = type; + ev_list->item = valInt; + break; + case 4: + ev_list->type = type; + ev_list->item = valChar; + break; + } + + ev_list->next = NULL; + curr->next = ev_list; + curr = ev_list; + + return ev_list; +} + + +void write_to_csv(FILE *fp) +{ + struct event_list *ev_list = head; + long int valueInt; + char *command; + fprintf(fp,"command!start!pid!end\n");// + while (ev_list != NULL) + { + switch(ev_list->type) + { + case 0: + valueInt = ev_list->item; + ev_list = ev_list->next; + if (valueInt == NULL) valueInt = 0; + fprintf(fp,"!%ld\n", valueInt); + break; + case 1: + valueInt = ev_list->item; + ev_list = ev_list->next; + if (valueInt == NULL) valueInt = 0; + fprintf(fp,"!!%ld", valueInt); + break; + case 2: + valueInt = ev_list->item; + ev_list = ev_list->next; + if (valueInt == NULL) valueInt = 0; + fprintf(fp,"!%ld", valueInt); + break; + case 3: + valueInt = ev_list->item; + ev_list = ev_list->next; + if (valueInt == NULL) valueInt = 0; + fprintf(fp,"!%ld\n", valueInt); + break; + case 4: + command = ev_list->item; + ev_list = ev_list->next; + if (command == NULL) command = ""; + fprintf(fp,"%s", command); + break; + } + } +} +#endif + /* * Ignore a signal. */ @@ -4863,7 +5013,11 @@ forkparent(struct job *jp, union node *n, int mode, pid_t pid) ps->ps_status = -1; ps->ps_cmd = nullstr; #if JOBS - if (doing_jobctl && n) +#ifdef GET_TIME_CONSUMPTION + if (n) +#else + if (doing_jobctl && n) +#endif ps->ps_cmd = commandtext(n); #endif } @@ -4891,89 +5045,6 @@ forkshell(struct job *jp, union node *n, int mode) return pid; } -/* - * Wait for job to finish. - * - * Under job control we have the problem that while a child process - * is running interrupts generated by the user are sent to the child - * but not to the shell. This means that an infinite loop started by - * an interactive user may be hard to kill. With job control turned off, - * an interactive user may place an interactive program inside a loop. - * If the interactive program catches interrupts, the user doesn't want - * these interrupts to also abort the loop. The approach we take here - * is to have the shell ignore interrupt signals while waiting for a - * foreground process to terminate, and then send itself an interrupt - * signal if the child process was terminated by an interrupt signal. - * Unfortunately, some programs want to do a bit of cleanup and then - * exit on interrupt; unless these processes terminate themselves by - * sending a signal to themselves (instead of calling exit) they will - * confuse this approach. - * - * Called with interrupts off. - */ -static int -waitforjob(struct job *jp) -{ - int st; - - TRACE(("waitforjob(%%%d) called\n", jobno(jp))); - - INT_OFF; - while (jp->state == JOBRUNNING) { - /* In non-interactive shells, we _can_ get - * a keyboard signal here and be EINTRed, - * but we just loop back, waiting for command to complete. - * - * man bash: - * "If bash is waiting for a command to complete and receives - * a signal for which a trap has been set, the trap - * will not be executed until the command completes." - * - * Reality is that even if trap is not set, bash - * will not act on the signal until command completes. - * Try this. sleep5intoff.c: - * #include - * #include - * int main() { - * sigset_t set; - * sigemptyset(&set); - * sigaddset(&set, SIGINT); - * sigaddset(&set, SIGQUIT); - * sigprocmask(SIG_BLOCK, &set, NULL); - * sleep(5); - * return 0; - * } - * $ bash -c './sleep5intoff; echo hi' - * ^C^C^C^C <--- pressing ^C once a second - * $ _ - * $ bash -c './sleep5intoff; echo hi' - * ^\^\^\^\hi <--- pressing ^\ (SIGQUIT) - * $ _ - */ - dowait(DOWAIT_BLOCK, jp); - } - INT_ON; - - st = getstatus(jp); -#if JOBS - if (jp->jobctl) { - xtcsetpgrp(ttyfd, rootpid); - /* - * This is truly gross. - * If we're doing job control, then we did a TIOCSPGRP which - * caused us (the shell) to no longer be in the controlling - * session -- so we wouldn't have seen any ^C/SIGINT. So, we - * intuit from the subprocess exit status whether a SIGINT - * occurred, and if so interrupt ourselves. Yuck. - mycroft - */ - if (jp->sigint) /* TODO: do the same with all signals */ - raise(SIGINT); /* ... by raise(jp->sig) instead? */ - } - if (jp->state == JOBDONE) -#endif - freejob(jp); - return st; -} /* * return 1 if there are stopped jobs, otherwise 0 @@ -7963,6 +8034,152 @@ describe_command(char *command, int describe_command_verbose) return 0; } + +/* + * Wait for job to finish. + * + * Under job control we have the problem that while a child process + * is running interrupts generated by the user are sent to the child + * but not to the shell. This means that an infinite loop started by + * an interactive user may be hard to kill. With job control turned off, + * an interactive user may place an interactive program inside a loop. + * If the interactive program catches interrupts, the user doesn't want + * these interrupts to also abort the loop. The approach we take here + * is to have the shell ignore interrupt signals while waiting for a + * foreground process to terminate, and then send itself an interrupt + * signal if the child process was terminated by an interrupt signal. + * Unfortunately, some programs want to do a bit of cleanup and then + * exit on interrupt; unless these processes terminate themselves by + * sending a signal to themselves (instead of calling exit) they will + * confuse this approach. + * + * Called with interrupts off. + */ +static int +waitforjob(struct job *jp) +{ + int st; + + TRACE(("waitforjob(%%%d) called\n", jobno(jp))); + + INT_OFF; + + while (jp->state == JOBRUNNING) + { + /* In non-interactive shells, we _can_ get + * a keyboard signal here and be EINTRed, + * but we just loop back, waiting for command to complete. + * + * man bash: + * "If bash is waiting for a command to complete and receives + * a signal for which a trap has been set, the trap + * will not be executed until the command completes." + * + * Reality is that even if trap is not set, bash + * will not act on the signal until command completes. + * Try this. sleep5intoff.c: + * #include + * #include + * int main() { + * sigset_t set; + * sigemptyset(&set); + * sigaddset(&set, SIGINT); + * sigaddset(&set, SIGQUIT); + * sigprocmask(SIG_BLOCK, &set, NULL); + * sleep(5); + * return 0; + * } + * $ bash -c './sleep5intoff; echo hi' + * ^C^C^C^C <--- pressing ^C once a second + * $ _ + * $ bash -c './sleep5intoff; echo hi' + * ^\^\^\^\hi <--- pressing ^\ (SIGQUIT) + * $ _ + */ + +#ifdef GET_TIME_CONSUMPTION + // START // + int n = jp->nprocs; + for (int j = 0; j < n; j++) + { + struct procstat *ps = &jp->ps[j]; + /* Skip non-running and not-stopped members + * (i.e. dead members) of the job + */ + struct timeval start; + long int startTime; + if (gettimeofday(&start,NULL)){ + //Handle error + } + long int usecStart = start.tv_sec * 1000000 + start.tv_usec; + if (ps->ps_status == -1) + { + startTime = usecStart; + } + else + { + startTime = -1; + } + char *command = ckstrdup(ps->ps_cmd); + store_ev_list(4,0,command); + store_ev_list(2,startTime, ""); + store_ev_list(0,ps->ps_pid, ""); + } +#endif + + dowait(DOWAIT_BLOCK, jp); + +#ifdef GET_TIME_CONSUMPTION + // END // + int m = jp->nprocs; + for (int k = 0; k < m; k++) + { + struct procstat *ps = &jp->ps[k]; + /* Skip non-running and not-stopped members + * (i.e. dead members) of the job + */ + long int endTime; + struct timeval end; + if (gettimeofday(&end,NULL)){ + //Handle error + } + long int usecEnd = end.tv_sec * 1000000 + end.tv_usec; + if (ps->ps_status != -1) + { + endTime = usecEnd; + } + else + { + endTime = -1; + } + store_ev_list(1,ps->ps_pid, ""); + store_ev_list(3,endTime, ""); + } +#endif + } + INT_ON; + st = getstatus(jp); +#if JOBS + if (jp->jobctl) { + xtcsetpgrp(ttyfd, rootpid); + /* + * This is truly gross. + * If we're doing job control, then we did a TIOCSPGRP which + * caused us (the shell) to no longer be in the controlling + * session -- so we wouldn't have seen any ^C/SIGINT. So, we + * intuit from the subprocess exit status whether a SIGINT + * occurred, and if so interrupt ourselves. Yuck. - mycroft + */ + if (jp->sigint) /* TODO: do the same with all signals */ + raise(SIGINT); /* ... by raise(jp->sig) instead? */ + } + if (jp->state == JOBDONE) + +#endif + freejob(jp); + return st; +} + static int FAST_FUNC typecmd(int argc UNUSED_PARAM, char **argv) { @@ -9426,7 +9643,7 @@ evalcommand(union node *cmd, int flags) INT_OFF; jp = makejob(/*cmd,*/ 1); if (forkshell(jp, cmd, FORK_FG) != 0) { - /* parent */ + /* parent */ exitstatus = waitforjob(jp); INT_ON; TRACE(("forked child exited with %d\n", exitstatus)); @@ -9434,7 +9651,7 @@ evalcommand(union node *cmd, int flags) } /* child */ FORCE_INT_ON; - /* fall through to exec'ing external program */ + /* fall through to exec'ing external program */ } listsetvar(varlist.list, VEXPORT|VSTACK); shellexec(argv, path, cmdentry.u.index); @@ -12986,6 +13203,24 @@ exitshell(void) char *p; int status; +#ifdef GET_TIME_CONSUMPTION + //save data to .csv files if needed + char *path_to_scores = lookupvar("ASH_SCORES"); + if(path_to_scores) { + char filename[30]; + snprintf(filename, 30, "/scores.csv.%d", getpid()); + path_to_scores = concat_path_file(path_to_scores, filename); + FILE *fp = fopen(path_to_scores, "w+"); + if (fp == NULL) + { + abort(); + } + write_to_csv(fp); + fclose(fp); + free((char*)path_to_scores); + } +#endif + #if ENABLE_FEATURE_EDITING_SAVE_ON_EXIT save_history(line_input_state); #endif