Permalink
Browse files

Merge pull request #6 from kavink/master

Clarify about using actprep when drives are OP using fdisk
  • Loading branch information...
2 parents d102edd + c621928 commit 09fe53d2989032803f39b53d0f2a6f5ee84f487b @cstivers78 cstivers78 committed Jan 15, 2013
Showing with 6 additions and 370 deletions.
  1. +6 −1 README
  2. +0 −280 latency_calc/cf_latency.py
  3. +0 −85 latency_calc/cf_latency.sh
  4. +0 −4 latency_calc/get_latency
View
7 README
@@ -58,9 +58,14 @@ This package contains actprep, an executable that may be used to clean and salt
a device. actprep takes a device name as its only command-line parameter. For
a typical 240GB SSD, actprep takes a little over an hour to run.
-Example - to clean and salt device /dev/sdc:
+Example - to clean and salt device /dev/sdc: (If Over-Provisioned using hdparm)
$ sudo ./actprep /dev/sdc
+If Over-Provisioned using fdisk, make sure you specify the partition and not raw
+device, if raw device(sdc) is used then it will wipe out the partition table.
+
+ $ sudo ./actprep /dev/sdc1
+
act Overview
------------
View
@@ -1,280 +0,0 @@
-#!/usr/bin/python
-
-# Get Latency of SSD from the act output
-
-import sys
-import types
-import getopt
-import re
-
-# Index for the 16 buckets
-bucketlist = ("00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16")
-
-# Function to get the values in the bucket.
-# Pass the operation (or the string in the act output)
-def get_buckets(operation, line, fileid):
- value = {}
- total=long(line[line.find("(")+1:line.find(" total)")])
- line = fileid.readline()
- found = 1
- for b in bucketlist:
- value[b] = 0.0
- while (found == 1):
- found = 0
- for b in bucketlist:
- pattern = '.*?\('+b+': (.*?)\).*?'
- r = re.compile(pattern)
- m = r.search(line)
- if m:
- found = 1
- value[b] = long(r.search(line).group(1))
- if (found == 1):
- line = fileid.readline()
-
- return total, value, line
-
-# Function to substract two buckets. Return the bucket of substracted values
-def substract_buckets(value1, value2):
- value = {}
- for b in bucketlist:
- value[b] = value1[b] - value2[b]
- return value
-
-# Function to get the percentage of operations within a bucket,
-# pass the total operation and the bucket containing values.
-# Returns the bucket containing percentage in each bucket
-def percentage_buckets(num_operations, operation_values):
- percentage_value = {}
- if (num_operations > 0):
- for b in bucketlist:
- percentage_value[b] = (float(operation_values[b])/num_operations)*100
- return percentage_value
-
-# Function to get the percentage of operations in bucket numbers > num
-def get_percentage(percentage_value, num):
- if (num > 0):
- i = 1;
- perc = 0.0
- for b in bucketlist:
- if (i>num):
- perc = perc + percentage_value[b]
- i=i+1
- return perc
-
-# Function to get the chunk of values for each interval of time.
-# Skip as many time intervals given in skip parameter. Pass any line from output
-def get_chunk(line, fileid, skip):
- time = 0
- for i in range(skip):
- while (line and not line.startswith("After ")):
- line = fileid.readline()
- if (i < skip - 1):
- line = fileid.readline()
- if not line:
- return 0, 0, 0, 0, 0, line
- time = long(line[line.find("After ")+6:line.find(" sec:")])
- line = fileid.readline()
- while(line and line.strip()):
- if (line.startswith("RAW READS ")):
- raw_total, raw_value, line = get_buckets("RAW READS", line, fileIN)
- elif (line.startswith("READS ")):
- read_total, read_value, line = get_buckets("READS", line, fileIN)
- else:
- line = fileid.readline()
-
- try:
- time, raw_total, raw_value, read_total, read_value
- except NameError:
- return 0, 0, 0, 0, 0, line
- else:
- return time, raw_total, raw_value, read_total, read_value, line
-
-# Print output line.
-def print_line(read, raw_read, num_buckets, skip_buckets, num):
- output="%3s" % (num)+" "
-
- for i in range((num_buckets -1) / skip_buckets + 1):
- space=" "
- if (i > 4):
- space=space+" "
- if(i>7):
- space=space+" "
- output=output+space+"%.2f" % (read[i*skip_buckets])
- output=output+" "
- if(num_buckets > 4):
- output=output+" "
- if(num_buckets > 7):
- output=output+" "
- for i in range((num_buckets - 1) / skip_buckets + 1):
- space=" "
- if (i > 4):
- space=space+" "
- if(i>7):
- space=space+" "
- output=output+space+"%.2f" % (raw_read[i*skip_buckets])
- print output
-
-# Function to print usage
-def usage():
- print "Usage:"
- print " -l act outfile (Eg. actout.txt)"
-# print " -b Number of buckets (Eg. 7)"
-# print " -s Skip buckets while printing (Eg. 3)"
- print " -t Time Interval in seconds (Eg. 3600)"
- return
-
-#arg processing
-try:
- opts, args = getopt.getopt(sys.argv[1:], "l:b:s:t:", ["log=","buckets=","skip_buckets=","time="])
-except getopt.GetoptError, err:
- print str(err)
- usage()
- sys.exit(-1)
-
-# Main
-
-# Default values for arguments
-arg_log = None
-arg_buckets = 7
-arg_skip_buckets = 3
-arg_time = 3600
-for o, a in opts:
- if ((o == "-l") or (o == "--log")):
- arg_log = a
- if ((o == "-b") or (o == "--buckets")):
- arg_buckets = int(a)
- if ((o == "-s") or (o == "--skip_buckets")):
- arg_skip_buckets = int(a)
- if ((o == "-t") or (o == "--time")):
- arg_time = long(a)
-
-if ((arg_log == None) or (arg_buckets == -1) or (arg_skip_buckets == -1) or (arg_time == -1)):
- usage()
- sys.exit(-1)
-
-if (arg_buckets > 16 or arg_buckets < 1):
- print "Buckets should be in between 1 and 16. Given", arg_buckets
- sys.exit(-1)
-
-if (arg_skip_buckets > arg_buckets):
- print "Skip buckets",arg_skip_buckets, "should be less than buckets",arg_buckets
- sys.exit(-1)
-
-# Open the log file
-try:
- fileIN = open(arg_log, "r")
-except:
- print "log file "+arg_log+" not found."
- sys.exit(-1)
-
-# Get the first chunk of values
-line = fileIN.readline()
-old_time, old_raw_read, old_raw_value, old_read, old_read_value, line = get_chunk(line, fileIN, 1)
-if (not old_time):
- print "get_chunk failed"
- exit (-1)
-
-# Get the second chunk of values
-new_time, new_raw_read, new_raw_value, new_read, new_read_value, line = get_chunk(line, fileIN, 1)
-if (not new_time):
- print "get_chunk failed"
- exit (-1)
-
-# Find the time interval and skip number according to arg_time
-time_interval = new_time-old_time
-if ((arg_time % time_interval) != 0):
- print "Time", arg_time, "is not multiple of",time_interval,". Cannot proceed"
- sys.exit(-1)
-num_interval = (arg_time / time_interval)
-if (num_interval < 3):
- print "Time", arg_time, "should be atleast more than twice",time_interval,". Cannot proceed"
- sys.exit(-1)
-
-# Initialize the array variable to store data
-num = 1
-read = [0.0] * arg_buckets
-raw_read = [0.0] * arg_buckets
-avg_read = [0.0] * arg_buckets
-avg_raw_read = [0.0] * arg_buckets
-max_read = [0.0] * arg_buckets
-max_raw_read = [0.0] * arg_buckets
-
-# Get the first chunk of data to process after the arg_time interval (already 2 lines read)
-new_time, new_raw_read, new_raw_value, new_read, new_read_value, line = get_chunk(line, fileIN, num_interval - 2)
-
-if (not new_time):
- print "Not enough line to print for "+str(arg_time)+" secs"
- sys.exit()
-else:
- final_time = new_time
-num_reads = new_read - old_read
-num_raw_reads = new_raw_read - old_raw_read
-num_read_value = substract_buckets(new_read_value, old_read_value)
-num_raw_value = substract_buckets(new_raw_value, old_raw_value)
-
-percentage_reads = percentage_buckets(num_reads, num_read_value)
-percentage_raw_reads = percentage_buckets(num_raw_reads, num_raw_value)
-for i in range(arg_buckets):
- read[i] = round(get_percentage(percentage_reads, i+1),2)
- raw_read[i] = round(get_percentage(percentage_raw_reads, i+1),2)
- avg_read[i] = read[i]
- avg_raw_read[i] = raw_read[i]
- max_read[i] = read[i]
- max_raw_read[i] = raw_read[i]
-
-# Print the heading
-output="slice whole"
-for i in range((arg_buckets - 1) / arg_skip_buckets + 1):
- timeout=pow(2, (i*arg_skip_buckets))
- output=output+" %>"+str(timeout)+"ms"
-output=output+" SSD-only"
-for i in range((arg_buckets - 1)/ arg_skip_buckets + 1):
- timeout=pow(2, (i*arg_skip_buckets))
- output=output+" %>"+str(timeout)+"ms"
-print output
-
-output_line="-------------------"
-for i in range((arg_buckets - 1) / arg_skip_buckets + 1):
- output_line=output_line+"--------------"
-print output_line
-
-# Print the first latency value
-print_line(read, raw_read, arg_buckets, arg_skip_buckets, num)
-
-# Process the rest of the data print latency values for each arg_time (skip num_interval)
-while line:
- num = num + 1
- new_time, new_raw_read, new_raw_value, new_read, new_read_value, line = get_chunk(line, fileIN, num_interval)
-
- if (not new_time):
- break
- else:
- final_time = new_time
- num_reads = new_read - old_read
- num_raw_reads = new_raw_read - old_raw_read
- num_read_value = substract_buckets(new_read_value, old_read_value)
- num_raw_value = substract_buckets(new_raw_value, old_raw_value)
-
- percentage_reads = percentage_buckets(num_reads, num_read_value)
- percentage_raw_reads = percentage_buckets(num_raw_reads, num_raw_value)
-
- for i in range(arg_buckets):
- read[i] = round(get_percentage(percentage_reads, i+1),2)
- raw_read[i] = round(get_percentage(percentage_raw_reads, i+1),2)
- avg_read[i] = round((avg_read[i] + read[i])/2,2)
- avg_raw_read[i] = round((avg_raw_read[i] + raw_read[i])/2,2)
- if (read[i] > max_read[i]):
- max_read[i] = read[i]
- if (raw_read[i] > max_raw_read[i]):
- max_raw_read[i] = raw_read[i]
- print_line(read, raw_read, arg_buckets, arg_skip_buckets, num)
-
- old_time, old_raw_read, old_raw_value, old_read, old_read_value = new_time, new_raw_read, new_raw_value, new_read, new_read_value
- line = fileIN.readline()
-
-# Print average and maximum
-print output_line
-print_line(avg_read, avg_raw_read, arg_buckets, arg_skip_buckets, "avg")
-print_line(max_read, max_raw_read, arg_buckets, arg_skip_buckets, "max")
-
-print "\nAnalyzed Test Duration:", final_time,"seconds\n"
View
@@ -1,85 +0,0 @@
-#!/bin/sh
-#set -x
-if [ $# != 3 ]
-then
- echo "Wrong Usage. Provide the log file name. Usage as follows:"
- echo "./cf_latency.sh <log file name> <preferred number of buckets> <need graph>"
- echo "Eg: ./cf_latency.sh cflog.log 3 0"
- exit 1
-fi
-
-if [ ! -r $1 ]
-then
- echo "File $1 is not found/readable"
- exit 1
-fi
-
-if [ $2 -le 0 ]
-then
- echo "Preferred number of buckets should be greater than 0"
- exit 1
-fi
-
-graph=0
-if [ $3 -eq 1 ]
-then
- graph=1
-fi
-
-logfile=$1
-pref_buckets=$2
-
-# Extract the lines which has information about read/writes and proxy and save in a temp log.
-awk ' /^READS/ { {printf("%s %s",$1, $2)} ; x=1; while( x++ <= 4 ) { getline; sub(//,"",$0); printf("%s", $0) } printf ("\n") }' $1 | sed 's/([0-9][0-9]://g ; s/)//g; s/(//g' > tmp-reads.log
-
-awk ' /^LARGE BLOCK WRITES/ { {printf("%s %s",$1, $4)}; x=1; while( x++ <= 4 ) { getline; sub(//,"",$0); printf("%s", $0) } printf ("\n") }' $1 | sed 's/([0-9][0-9]://g ; s/)//g; s/(//g' > tmp-writes.log
-
-awk ' /^LARGE BLOCK READS/ { {printf("%s %s",$1, $4)}; x=1; while( x++ <= 4 ) { getline; sub(//,"",$0); printf("%s", $0) } printf ("\n") }' $1 | sed 's/([0-9][0-9]://g ; s/)//g; s/(//g' > tmp-defrag.log
-
-awk ' /^RAW READS/ { {printf("%s %s",$1, $3)}; x=1; while( x++ <= 4 ) { getline; sub(//,"",$0); printf("%s", $0) } printf ("\n") }' $1 | sed 's/([0-9][0-9]://g ; s/)//g; s/(//g' > tmp-ssd-reads.log
-
-# Create graph.out from read.log with the read percentage for each bucket. Format for the csv file is "day of month" "time" "read % for bucket1" "read % for bucket2" etc.
-if [ $graph -eq 1 ]
-then
- awk '{ {if (NR == 1) { date1=$2; month1=$1; time1=$4; reads1=$5; i=0; while (i++ <= 12) { bucket1[i]=$(i+5) }} else { date1=data2; month1=month2; time1=time2; reads1=reads2; i=0; while (i++ <= 12) { bucket1[i]=bucket2[i]}}} { if (NR == 1) getline; date2=$2; month2=$1; time2=$4; reads2=$5; i=0; while (i++ <= 12) { bucket2[i]=$(i+5)}} {i=0; reads=reads2-reads1 ; printf("%s/%s-%s ",date2,month2, time2) ; if(reads<=0) reads=1 ;while (i++<=12) { printf("%.2f ", ((bucket2[i]-bucket1[i])/reads)*100) } } printf("\n")}' tmp-reads.log > graph-${logfile}.out
- awk '{ {if (NR == 1) { date1=$2; month1=$1; time1=$4; reads1=$5; i=0; while (i++ <= 12) { bucket1[i]=$(i+5) }} else { date1=data2; month1=month2; time1=time2; reads1=reads2; i=0; while (i++ <= 12) { bucket1[i]=bucket2[i]}}} { if (NR == 1) getline; date2=$2; month2=$1; time2=$4; reads2=$5; i=0; while (i++ <= 12) { bucket2[i]=$(i+5)}} {i=0; reads=reads2-reads1 ; printf("%s/%s-%s ",date2,month2, time2) ; if(reads<=0) reads=1 ;while (i++<=12) { printf("%.2f ", ((bucket2[i]-bucket1[i])/reads)*100) } } printf("\n")}' tmp-ssd-reads.log > graph-ssd-${logfile}.out
-
-fi
-
-echo "Reads " > tmp-graph-tp.log
-head -1 tmp-reads.log >> tmp-graph-tp.log
-tail -2 tmp-reads.log > tmp-reads-t.log
-head -1 tmp-reads-t.log >> tmp-graph-tp.log
-wc -l tmp-reads.log >> tmp-graph-tp.log
-echo "Writes " >> tmp-graph-tp.log
-head -1 tmp-writes.log >> tmp-graph-tp.log
-tail -2 tmp-writes.log > tmp-writes-t.log
-head -1 tmp-writes-t.log >> tmp-graph-tp.log
-wc -l tmp-writes.log >> tmp-graph-tp.log
-echo "Defrag Reads " >> tmp-graph-tp.log
-head -1 tmp-defrag.log >> tmp-graph-tp.log
-tail -2 tmp-defrag.log > tmp-defrag-t.log
-head -1 tmp-defrag-t.log >> tmp-graph-tp.log
-wc -l tmp-defrag.log >> tmp-graph-tp.log
-echo "SSD Reads " >> tmp-graph-tp.log
-head -1 tmp-ssd-reads.log >> tmp-graph-tp.log
-tail -2 tmp-ssd-reads.log > tmp-ssd-reads-t.log
-head -1 tmp-ssd-reads-t.log >> tmp-graph-tp.log
-wc -l tmp-ssd-reads.log >> tmp-graph-tp.log
-
-echo "Latency Threshold = 2^$pref_buckets ms"
-
-((pref_buckets=pref_buckets+1))
-
-head -4 tmp-graph-tp.log | awk -v pb=$pref_buckets '{getline; i=1; reads1=0; in_reads1=0; while (i++ <= 13) { val=$(1+i); if(substr(val, 0, 1) == "A") { break; } if(i <= pb) { in_reads1=in_reads1+val; } in_reads1_arr[i]=val; reads1=reads1+val; } getline; i=1; reads2=0; in_reads2=0; while (i++ <= 13) { val=$(1+i); if(substr(val, 0, 1) == "A") { break; } if(i <= pb) { in_reads2=in_reads2+$(1+i); } in_reads2_arr[i]=val; reads2=reads2+val; } total_in_bucket=in_reads2-in_reads1; total_reads=reads2-reads1; total_timeout=total_reads-total_in_bucket; in_bucket = (total_in_bucket/total_reads)*100 ; in_timeout = (total_timeout / total_reads)*100; read_buckets=""; getline; time=$0-2; i=1; bvsum=0; while(i++ <= length(in_reads2_arr)) {bucket_val=sprintf("%.1f", (in_reads2_arr[i]-in_reads1_arr[i])/time); read_buckets = read_buckets " | " bucket_val; bvsum=bvsum+bucket_val;}; printf ("Percent Above Latency Threshold - %s\nRead Buckets | Total = %s%s\n", in_timeout, bvsum, read_buckets); }'
-
-head -16 tmp-graph-tp.log | awk -v pb=$pref_buckets '{getline; getline; getline; getline; getline; getline; getline; getline; getline; getline; getline; getline; getline; i=1; reads1=0; in_reads1=0; while (i++ <= 13) { val=$(1+i); if(substr(val, 0, 1) == "/" || substr(val, 0, 1) == "R") { break; } if(i <= pb) { in_reads1=in_reads1+val; } in_reads1_arr[i]=val; reads1=reads1+val; } getline; i=1; reads2=0; in_reads2=0; while (i++ <= 13) { val=$(1+i); if(substr(val, 0, 1) == "/" || substr(val, 0, 1) == "R") { break; } if(i <= pb) { in_reads2=in_reads2+val; } in_reads2_arr[i]=val; reads2=reads2+val; } total_in_bucket=in_reads2-in_reads1; total_reads=reads2-reads1; total_timeout=total_reads-total_in_bucket; in_bucket = (total_in_bucket/total_reads)*100 ; in_timeout = (total_timeout / total_reads)*100; read_buckets=""; getline; time=$0-2; i=1; bvsum=0; while(i++ <= length(in_reads2_arr)) {bucket_val=sprintf("%.1f", (in_reads2_arr[i]-in_reads1_arr[i])/time); read_buckets = read_buckets " | " bucket_val; bvsum=bvsum+bucket_val;}; printf ("Percent Above Latency Threshold SSD - %s\nRead Buckets SSD | Total = %s%s\n", in_timeout, bvsum, read_buckets); }'
-
-awk -F"[: ]" '{ printf("%s", $0); getline; if ($2 != "") {reads1=$2} else {reads1=$3} ; getline; if ($2 != "") {reads2=$2} else {reads2=$3}; reads=reads2-reads1; getline; time=$0-2; throughput=reads/time; printf ("Throughput = %s per second\n", throughput)}' tmp-graph-tp.log
-
-echo "----------"
-
-# Plot the graphs from the graph,out
-#./graph.pg > graph.png
-#./graph_one.pg > graph.png
-
View
@@ -1,4 +0,0 @@
-#!/bin/bash
-sudo tail -$4 $1 > fooraw.log
-sudo head -$3 fooraw.log > foo.log
-./cf_latency.sh foo.log $2 0

0 comments on commit 09fe53d

Please sign in to comment.