Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 00e250a
Showing
9 changed files
with
313,241 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
package DNSBinarySearch | ||
|
||
import ( | ||
"bufio" | ||
"fmt" | ||
"io" | ||
"os" | ||
"sort" | ||
"strings" | ||
|
||
// for string reverse function | ||
"github.com/golang/example/stringutil" | ||
) | ||
|
||
// we expect every line to be less than 500 bytes (DNS only allows 255) | ||
const MAXLINESIZE = 500 | ||
|
||
// scan backwards 10 kilobytes at a time looking for the edge of our matched string | ||
const WALKBYTES = 10000 | ||
|
||
// we use 2 limits to limit runtime & output size of these library | ||
type Limits struct { | ||
// the maximum distance to scan backwards (x 10kB) | ||
MaxScan int | ||
// the maximum number of lines of output | ||
MaxOutputLines int | ||
} | ||
|
||
var DefaultLimits = Limits{ | ||
MaxScan: 100, // 10MB | ||
MaxOutputLines: 100000, // 100,000 lines | ||
} | ||
|
||
// fetches a string buffer from a file | ||
func getStringBuffer(f *os.File, offset int) (string, error) { | ||
_, err := f.Seek(int64(offset), 0) | ||
if err != nil { | ||
return "", err | ||
} | ||
returnBuf := make([]byte, MAXLINESIZE) | ||
_, err = io.ReadAtLeast(f, returnBuf, MAXLINESIZE) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
return string(returnBuf), nil | ||
} | ||
|
||
// get the next line from a random string buffer | ||
// (the first full line, newline char seperated) | ||
func getNextLine(str string) string { | ||
// get the start of the next line | ||
lines := strings.Split(str, "\n") | ||
if len(lines) < 2 { | ||
// we expect the input file to be sufficiently large that we do not need to handle the EOF/start edge cases | ||
// we also expect that every line is less than 500 chars, that could also trigger this case | ||
return "" | ||
} | ||
|
||
// take out what we are going to compare | ||
// (the first line, after the next newline char, up to the length of the line we are trying to find) | ||
return lines[1] | ||
} | ||
|
||
// intermediary helper function to simplify code below | ||
// takes a file, offset and string to search for | ||
// returns a string compareLine which is fullLine truncated to len(searchStr) | ||
// if err is set, result cannot be trusted | ||
func getLineDetails(f *os.File, offset int, searchStr string) (compareLine string, err error) { | ||
// get the string buffer at this offset | ||
stringBuffer, err := getStringBuffer(f, offset) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
// get the next line | ||
fullLine := getNextLine(stringBuffer) | ||
compareLine = fullLine | ||
if fullLine == "" { | ||
return "", fmt.Errorf("Failed to get next line from string buffer: %s\n", stringBuffer) | ||
} | ||
|
||
// filter out up to the length of the search string | ||
if len(compareLine) > len(searchStr) { | ||
compareLine = compareLine[0:len(searchStr)] | ||
} | ||
|
||
return | ||
} | ||
|
||
// pass a file path and search string to search for matches | ||
// expects the file to sorted, with domain names at the start of the file, in reverse order | ||
// example: "moc.elpmaxe.www,1.1.1.1" | ||
// returns a list of matches | ||
// example ["1.1.1.1,www.example.com"] | ||
func DNSBinarySearch(filePath string, searchStr string, limit Limits) (ret []string, err error) { | ||
|
||
// reverse the search string | ||
searchStr = stringutil.Reverse(searchStr) | ||
|
||
// open the file & get it's size | ||
f, err := os.Open(filePath) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to open file") | ||
} | ||
fi, err := f.Stat() | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to stat file") | ||
} | ||
|
||
// use sort.Search to find a line in our sorted file containing the search string | ||
// A possible enhancement here is to define our own sort.Search with an interface to | ||
// pass in the variables we need, rather than implicitly passing to the sub function here... | ||
foundByteLocation := sort.Search(int(fi.Size()), func(i int) bool { | ||
|
||
// use the intermediary function to get the line details at the offset we are currently considering | ||
searchLineCompare, err := getLineDetails(f, i, searchStr) | ||
if err != nil { | ||
// this should trigger an error in the next phase causing us to fail out quickly | ||
return false | ||
} | ||
|
||
// substring compare | ||
if strings.Compare(searchStr, searchLineCompare) > 0 { | ||
return false | ||
} else { | ||
return true | ||
} | ||
}) // end sort.Search | ||
|
||
// check if we found a match, if we did not, exit out | ||
stringBuffer, err := getStringBuffer(f, foundByteLocation) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to get matched buffer from file?") | ||
} | ||
fullLine := getNextLine(stringBuffer) | ||
if fullLine == "" || !strings.HasPrefix(fullLine, searchStr) { | ||
return nil, fmt.Errorf("failed to find exact match via binary search") | ||
} | ||
|
||
// walk back 10 kilobytes bytes at a time, searching for a line that does not contain a match | ||
minSearchLocation := foundByteLocation | ||
maxScan := limit.MaxScan | ||
for { | ||
|
||
maxScan-- | ||
if maxScan == 0 { | ||
return nil, fmt.Errorf("scan limit reached!") | ||
} | ||
|
||
// walk backwards in the file | ||
minSearchLocation = minSearchLocation - WALKBYTES | ||
if minSearchLocation < 0 { | ||
return nil, fmt.Errorf("scanned backwards too far! Reached start of file!") | ||
} | ||
|
||
// get the string buffer & next line at this offset | ||
searchLineCompare, err := getLineDetails(f, minSearchLocation, searchStr) | ||
if err != nil { | ||
return nil, fmt.Errorf("unexpected failure, failed to fetch next line while walking backwards?") | ||
} | ||
|
||
// we are looking for the first result that does not contain our substring | ||
if strings.Compare(searchStr, searchLineCompare) != 0 { | ||
break | ||
} | ||
} | ||
|
||
// seek to the minimum search location (this is likely unncessary to repeat as it was already done in getLineDetails above) | ||
_, err = f.Seek(int64(minSearchLocation), 0) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// now that we have a min-location, use a bufio reader & ReadString('\n') to read the next line until they do not match! | ||
|
||
// call readString once to advance the pointer to the next \n | ||
reader := bufio.NewReader(f) | ||
_, err = reader.ReadString('\n') | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
firstHit := false // bool flag is set to true once we start matching, once we stop matching and this flag is true, we can exit! | ||
maxOutputLines := limit.MaxOutputLines | ||
for { | ||
|
||
maxOutputLines-- | ||
if maxOutputLines == 0 { | ||
// we likely could return what we have here already, but the result would be incomplete... | ||
return nil, fmt.Errorf("output limit reached!") | ||
} | ||
|
||
// this will read the next string up to the \n char | ||
nextLine, err := reader.ReadString('\n') | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// remove the newline char | ||
nextLine = strings.TrimSuffix(nextLine, "\n") | ||
|
||
// filter out up to the length of the search string | ||
compareLine := nextLine | ||
if len(compareLine) > len(searchStr) { | ||
compareLine = compareLine[0:len(searchStr)] | ||
} | ||
|
||
// strings match! | ||
if strings.Compare(compareLine, searchStr) == 0 { | ||
// append the reversed line | ||
ret = append(ret, stringutil.Reverse(nextLine)) | ||
|
||
// if this is our first hit, mark it as such! | ||
if firstHit == false { | ||
firstHit = true | ||
} | ||
} else if firstHit == true { | ||
// we've had a string match before, and they no longer match! | ||
// it's time to return | ||
break | ||
} | ||
|
||
} | ||
|
||
// and finally, close the file | ||
f.Close() | ||
|
||
return | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
package DNSBinarySearch | ||
|
||
import "testing" | ||
|
||
// a quick sanity check to make sure this library works as expected | ||
func TestResult(t *testing.T) { | ||
|
||
output, err := DNSBinarySearch("test_data.txt", "amiccom.com.tw", DefaultLimits) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %+v", err) | ||
} else { | ||
if len(output) != 6 { | ||
t.Fatalf("unexpected output length: %+v", output) | ||
} | ||
} | ||
} |
Oops, something went wrong.