From eae6a4fc753611aa13a2229dcee137102cb477fb Mon Sep 17 00:00:00 2001 From: ram Date: Fri, 2 Apr 2021 14:25:23 -0400 Subject: [PATCH 1/3] #3 Satellite-v1 raw data sample analysis --- README.md | 4 +- analysis/cmd/analyze-cp.go | 33 +- analysis/pkg/hquack/analysis.go | 6 +- analysis/pkg/hquack/hquack.go | 4 +- analysis/pkg/satellite/analysis.go | 76 ++++ analysis/pkg/satellite/io.go | 44 ++ analysis/pkg/satellite/satellite.go | 436 ++++++++++++++++++++ analysis/pkg/satellite/util.go | 199 +++++++++ analysis/pkg/tarballReader/tarballReader.go | 11 +- docs/satellitev1.rst | 45 +- docs/satellitev2.rst | 32 +- go.mod | 12 + go.sum | 63 +++ 13 files changed, 896 insertions(+), 69 deletions(-) create mode 100644 analysis/pkg/satellite/analysis.go create mode 100644 analysis/pkg/satellite/io.go create mode 100644 analysis/pkg/satellite/satellite.go create mode 100644 analysis/pkg/satellite/util.go create mode 100644 go.mod create mode 100644 go.sum diff --git a/README.md b/README.md index d3f354e..365e8d7 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This respository contains documentation about the raw data from the [Censored Planet Observatory](https://censoredplanet.org/data/raw) and includes code to analyze the data and run several useful observations. ## Analysis (analyze-cp) - Example analysis tool to parse the raw data files on the [Censored Planet Observatory Website](https://censoredplanet.org/data/raw). Currently, only analysis for `quack-v1` and `hyperquack-v1` data is supported. `satellite-v1` support is coming soon. The analysis tool converts raw data into digestible CSV files that contain aggregates of data at the country level. User is prompted to choose the type of output. + Example analysis tool to parse the raw data files on the [Censored Planet Observatory Website](https://censoredplanet.org/data/raw). Currently, only analysis for `quack-v1`, `hyperquack-v1`, and `satellite-v1` data is supported. `v2` support is coming soon. The analysis tool converts raw data into digestible CSV files that contain aggregates of data at the country level. User is prompted to choose the type of output. `analyze-cp` can be compiled using the makefile in the `analysis` directory or by using `go build` in `analysis/cmd/`. `analysis-cp` needs two REQUIRED inputs (tar.gz file downloaded from [Censored Planet Observatory Website](https://censoredplanet.org/data/raw) and Maxmind GeoLite2-City.mmdb file downloaded from the [Maxmind Website](https://maxmind.com)). @@ -15,11 +15,11 @@ This respository contains documentation about the raw data from the [Censored Pl --input-file, REQUIRED, "Input tar.gz file (downloaded from censoredplanet.org)" --output-file, Default - output.csv, "Output CSV file" --mmdb-file, REQUIRED, "Maxmind Geolocation MMDB file (Download from maxmind.com)" +--satellitev1-html-file, OPTIONAL, Default - "", "JSON file that contains HTML responses for detecting blockpages from satellitev1 resolved IP addresses. The JSON file should have the following fields: 1) ip (resolved ip from satellitev1 that is marked as an anomaly), query (query performed by satellitev1), body (HTML body). If unspecified, the blockpage matching process will be skipped." --log-file, Default - '-'(STDERR), "file name for logging" --verbosity, Default - 3, "level of log detail (increasing from 0-5)" ``` - ## Documentation The documentation is available in the `docs` directory and it is hosted [here](https://censoredplanet.readthedocs.io). diff --git a/analysis/cmd/analyze-cp.go b/analysis/cmd/analyze-cp.go index ea0227e..530ecd8 100644 --- a/analysis/cmd/analyze-cp.go +++ b/analysis/cmd/analyze-cp.go @@ -10,6 +10,7 @@ import ( "github.com/censoredplanet/censoredplanet/analysis/pkg/geolocate" "github.com/censoredplanet/censoredplanet/analysis/pkg/hquack" + "github.com/censoredplanet/censoredplanet/analysis/pkg/satellite" log "github.com/sirupsen/logrus" ) @@ -19,6 +20,7 @@ type Flags struct { outputFile string blockpageFile string falsePositiveFile string + satellitev1HtmlFile string skipDatesFile string mmdbFile string logLevel uint @@ -44,6 +46,7 @@ func main() { flag := flag.NewFlagSet("flags", flag.ExitOnError) flag.StringVar(&f.inputFile, "input-file", "", "REQUIRED - Input tar.gz file (downloaded from censoredplanet.org)") flag.StringVar(&f.outputFile, "output-file", "output.csv", "Output csv file (default - output.csv)") + flag.StringVar(&f.satellitev1HtmlFile, "satellitev1-html-file", "", "(Optional) json file that contains HTML responses for detecting blockpages from satellitev1 resolved IP addresses. The JSON file should have the following fields: 1) ip (resolved ip), query (query performed by satellitev1), body (HTML body). If unspecified, the blockpage matching process will be skipped.") flag.StringVar(&f.mmdbFile, "mmdb-file", "", "REQUIRED - Maxmind Geolocation MMDB file (Download from maxmind.com)") flag.UintVar(&f.logLevel, "verbosity", 3, "level of log detail (0-5)") flag.StringVar(&f.logFileFlag, "log-file", "-", "file name for logging, (- is stderr)") @@ -92,19 +95,28 @@ func main() { } filename := parts[len(parts)-1] //Compile regex for filename from Censored Planet website - r := regexp.MustCompile("CP_[a-zA-Z]+-[a-zA-Z]+-20[1-3][0-9]-[0-1][0-9]-[0-3][0-9]-[0-2][0-9]-[0-5][0-9]-[0-5][0-9].tar.gz") + r := regexp.MustCompile("CP_[a-zA-Z]+[-]*[a-zA-Z]*-20[1-3][0-9]-[0-1][0-9]-[0-3][0-9]-[0-2][0-9]-[0-5][0-9]-[0-5][0-9].tar.gz") if !r.MatchString(filename) { log.Fatal("Input file does not match expected file name pattern. Please use same file name pattern as in the censoredplanet.org website") } //Extract the scan technique and scan date - technique := strings.ToLower(strings.Split(filename, "-")[1]) - scandate := strings.Split(filename, "-")[2] + strings.Split(filename, "-")[3] + strings.Split(filename, "-")[4] + technique := strings.ToLower(strings.Split(strings.Split(filename, "-")[0], "_")[1]) + protocol := "" + scandate := "" + if technique == "quack"{ + protocol = strings.ToLower(strings.Split(filename, "-")[1]) + scandate = strings.Split(filename, "-")[2] + strings.Split(filename, "-")[3] + strings.Split(filename, "-")[4] + } else if technique == "satellite" { + scandate = strings.Split(filename, "-")[1] + strings.Split(filename, "-")[2] + strings.Split(filename, "-")[3] + } else { + log.Fatal("Unsupported technique for analysis") + } //Should this scan be skipped? if skip := ReadSkipScanDates("https://assets.censoredplanet.org/avoid_scandates.txt", technique, scandate); skip == true { log.Fatal("This scan is in the do-not-include list.") - } + } //Initialize maxmind log.Info("Input File okay!") @@ -115,11 +127,12 @@ func main() { log.Info("Maxmind init success") //Start analysis - if technique == "echo" || technique == "discard" || technique == "http" || technique == "https" { - hquack.AnalyzeHquack(f.inputFile, f.outputFile, technique) + if technique == "quack" { + hquack.AnalyzeHquack(f.inputFile, f.outputFile, protocol) } else if technique == "satellite" { - log.Fatal("Support for Satellite analysis is coming soon") - } else { - log.Fatal("Unsupported technique for analysis") - } + if scandate >= "20210301" { + log.Fatal("Satellitev2 support is not provided yet") + } + satellite.AnalyzeSatellite(f.inputFile, f.outputFile, f.satellitev1HtmlFile) + } } diff --git a/analysis/pkg/hquack/analysis.go b/analysis/pkg/hquack/analysis.go index f9b5d7a..152187a 100644 --- a/analysis/pkg/hquack/analysis.go +++ b/analysis/pkg/hquack/analysis.go @@ -22,7 +22,7 @@ func Analyze(data []map[string]interface{}, analysisType string) map[string]map[ if _, ok := dataMap[result["Country"].(string)][result["Keyword"].(string)]["Anomalies"]; !ok { dataMap[result["Country"].(string)][result["Keyword"].(string)]["Anomalies"] = 0 } - if _, ok := dataMap[result["Country"].(string)][result["Keyword"].(string)]["Confirmed"]; !ok { + if _, ok := dataMap[result["Country"].(string)][result["Keyword"].(string)]["Confirmations"]; !ok { dataMap[result["Country"].(string)][result["Keyword"].(string)]["Confirmations"] = 0 } @@ -50,7 +50,7 @@ func Analyze(data []map[string]interface{}, analysisType string) map[string]map[ if _, ok := dataMap[result["Country"].(string)][result["Server"].(string)]["Anomalies"]; !ok { dataMap[result["Country"].(string)][result["Server"].(string)]["Anomalies"] = 0 } - if _, ok := dataMap[result["Country"].(string)][result["Server"].(string)]["Confirmed"]; !ok { + if _, ok := dataMap[result["Country"].(string)][result["Server"].(string)]["Confirmations"]; !ok { dataMap[result["Country"].(string)][result["Server"].(string)]["Confirmations"] = 0 } @@ -78,7 +78,7 @@ func Analyze(data []map[string]interface{}, analysisType string) map[string]map[ if _, ok := dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Anomalies"]; !ok { dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Anomalies"] = 0 } - if _, ok := dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Confirmed"]; !ok { + if _, ok := dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Confirmations"]; !ok { dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Confirmations"] = 0 } diff --git a/analysis/pkg/hquack/hquack.go b/analysis/pkg/hquack/hquack.go index 14bb9fe..a3a9dd9 100644 --- a/analysis/pkg/hquack/hquack.go +++ b/analysis/pkg/hquack/hquack.go @@ -261,7 +261,7 @@ func ProcessLine() *goflow.Graph { return network } -//Prompt gets the user's choice of anaylisys type +//Prompt gets the user's choice of analysis type func Prompt() string { types := []analyses{ @@ -322,7 +322,7 @@ func AnalyzeHquack(inputFile string, outputFile string, technique string) { //Read the Tar file //TODO: Read more than one tar file for files in 2020 - fileBytes, err := tarballReader.ReadTarball(fileReader) + fileBytes, err := tarballReader.ReadTarball(fileReader, "results.json") if err != nil { log.Fatal("Could not read tarball", err.Error()) diff --git a/analysis/pkg/satellite/analysis.go b/analysis/pkg/satellite/analysis.go new file mode 100644 index 0000000..0991e69 --- /dev/null +++ b/analysis/pkg/satellite/analysis.go @@ -0,0 +1,76 @@ +//Copyright 2021 Censored Planet + +// Package satellite contains analysis scripts for satellite data +package satellite + +//Analyze processes data based on the type of analysis specified +//Input - The data ([]map[string]interface{}), the analysisType (Specified by the user prompt) +//Returns - The stats about the data (map[string]map[string]map[string]int) +func Analyze(data []map[string]interface{}, analysisType string) map[string]map[string]map[string]int { + var dataMap = map[string]map[string]map[string]int{} + if analysisType == "Domain" { + for _, result := range data { + if _, ok := dataMap[result["Country"].(string)]; !ok { + dataMap[result["Country"].(string)] = map[string]map[string]int{} + } + if _, ok := dataMap[result["Country"].(string)][result["Query"].(string)]; !ok { + dataMap[result["Country"].(string)][result["Query"].(string)] = map[string]int{} + } + if _, ok := dataMap[result["Country"].(string)][result["Query"].(string)]["Measurements"]; !ok { + dataMap[result["Country"].(string)][result["Query"].(string)]["Measurements"] = 0 + } + if _, ok := dataMap[result["Country"].(string)][result["Query"].(string)]["Anomalies"]; !ok { + dataMap[result["Country"].(string)][result["Query"].(string)]["Anomalies"] = 0 + } + if result["Fetched"] == true { + if _, ok := dataMap[result["Country"].(string)][result["Query"].(string)]["Confirmations"]; !ok { + dataMap[result["Country"].(string)][result["Query"].(string)]["Confirmations"] = 0 + } + } + + dataMap[result["Country"].(string)][result["Query"].(string)]["Measurements"]++ + + if result["Anomaly"] == true { + dataMap[result["Country"].(string)][result["Query"].(string)]["Anomalies"]++ + } + if result["Fetched"] == true { + if result["Confirmed"] == "true" { + dataMap[result["Country"].(string)][result["Query"].(string)]["Confirmations"]++ + } + } + } + } else if analysisType == "Vantage Point" { + for _, result := range data { + if _, ok := dataMap[result["Country"].(string)]; !ok { + dataMap[result["Country"].(string)] = map[string]map[string]int{} + } + if _, ok := dataMap[result["Country"].(string)][result["Resolver"].(string)]; !ok { + dataMap[result["Country"].(string)][result["Resolver"].(string)] = map[string]int{} + } + if _, ok := dataMap[result["Country"].(string)][result["Resolver"].(string)]["Measurements"]; !ok { + dataMap[result["Country"].(string)][result["Resolver"].(string)]["Measurements"] = 0 + } + if _, ok := dataMap[result["Country"].(string)][result["Resolver"].(string)]["Anomalies"]; !ok { + dataMap[result["Country"].(string)][result["Resolver"].(string)]["Anomalies"] = 0 + } + if result["Fetched"] == true { + if _, ok := dataMap[result["Country"].(string)][result["Resolver"].(string)]["Confirmations"]; !ok { + dataMap[result["Country"].(string)][result["Resolver"].(string)]["Confirmations"] = 0 + } + } + + dataMap[result["Country"].(string)][result["Resolver"].(string)]["Measurements"]++ + + if result["Anomaly"] == true { + dataMap[result["Country"].(string)][result["Resolver"].(string)]["Anomalies"]++ + } + if result["Fetched"] == true { + if result["Confirmed"] == "true" { + dataMap[result["Country"].(string)][result["Resolver"].(string)]["Confirmations"]++ + } + } + } + } + return dataMap + +} diff --git a/analysis/pkg/satellite/io.go b/analysis/pkg/satellite/io.go new file mode 100644 index 0000000..03bc480 --- /dev/null +++ b/analysis/pkg/satellite/io.go @@ -0,0 +1,44 @@ +//Copyright 2021 Censored Planet + +// Package satellite contains analysis scripts for satellite data +package satellite + +import ( + "encoding/csv" + "fmt" + "os" + + log "github.com/sirupsen/logrus" +) + +//WriteToCSV writes a map of analyzed data to a CSV file. +//Input - The data to write (map[string]map[string]map[string]int), the analysis type ("Vantage Point", "Domain"), Output csv filename +//Returns - None +//WriteToCSV expects the data to be in a certain nested map format. +func WriteToCSV(dataMap map[string]map[string]map[string]int, filename string) { + file, err := os.Create(filename) + if err != nil { + log.Fatal("Cannot create output file: ", err.Error()) + } + defer file.Close() + w := csv.NewWriter(file) + defer w.Flush() + + w.Write([]string{"Country", "Vantage Point/Website", "Measurements", "Anomalies", "Confirmations"}) + + for country, innerMap := range dataMap { + for targetValue, statsMap := range innerMap { + if _, ok := statsMap["Confirmations"]; ok { + err := w.Write([]string{fmt.Sprintf("%v", country), fmt.Sprintf("%v", targetValue), fmt.Sprintf("%v", statsMap["Measurements"]), fmt.Sprintf("%v", statsMap["Anomalies"]), fmt.Sprintf("%v", statsMap["Confirmations"])}) + if err != nil { + log.Warn("Could not write row due to: ", err.Error()) + } + } else { + err := w.Write([]string{fmt.Sprintf("%v", country), fmt.Sprintf("%v", targetValue), fmt.Sprintf("%v", statsMap["Measurements"]), fmt.Sprintf("%v", statsMap["Anomalies"]), "N/A"}) + if err != nil { + log.Warn("Could not write row due to: ", err.Error()) + } + } + } + } +} diff --git a/analysis/pkg/satellite/satellite.go b/analysis/pkg/satellite/satellite.go new file mode 100644 index 0000000..8bf2406 --- /dev/null +++ b/analysis/pkg/satellite/satellite.go @@ -0,0 +1,436 @@ +//Copyright 2021 Censored Planet + +package satellite + +import ( + "encoding/json" + "os" + "regexp" + "sort" + "strings" + + //"github.com/censoredplanet/censoredplanet/analysis/pkg/geolocate" + "github.com/censoredplanet/censoredplanet/analysis/pkg/tarballReader" + "github.com/censoredplanet/censoredplanet/analysis/pkg/hquack" + "github.com/cheggaaa/pb/v3" + "github.com/manifoldco/promptui" + log "github.com/sirupsen/logrus" + "github.com/trustmaster/goflow" + set "github.com/deckarep/golang-set" +) + + +//Big CDNs regex +var cdnRegex = regexp.MustCompile("AMAZON|Akamai|OPENDNS|CLOUDFLARENET|GOOGLE") + +//analyses stores the format for user prompt +type analyses struct { + Name string + Index string +} + +//Parse forms the network components for parsing json information +type Parse struct { + InLine <-chan string + ResJSONData chan<- map[string]interface{} +} + +//Process parses the json data +func (p *Parse) Process() { + for line := range p.InLine { + var jsonData map[string]interface{} + if err := json.Unmarshal([]byte(line), &jsonData); err != nil { + log.Warn("Cannot unmarshal JSON data: ", line, ", Error: ", err.Error()) + } else { + p.ResJSONData <- jsonData + } + } +} + +//MetaData is the component that assigns measurement metadata to each row +type MetaData struct { + InMetaData <-chan map[string]interface{} + Geolocation <-chan map[string]string + OutMetaData chan<- map[string]interface{} +} + +//Process assigns measurement metadata to each row +func (m *MetaData) Process() { + geolocation := <-m.Geolocation + for data := range m.InMetaData { + vantagePoint := data["resolver"].(string) + if geolocation, ok := geolocation[vantagePoint]; !ok { + log.Warn("Did not find geolocation for vantage point: ", vantagePoint) + } else { + data["Geolocation"] = geolocation + m.OutMetaData <- data + } + } +} + +//Filter is the component for marking untagged measurements +type Filter struct { + InFilterData <-chan map[string]interface{} + ControlAnswers <-chan map[string]*tagsSet + OutFilterData chan<- map[string]interface{} +} + +//Process marks untagged measurements +func (f *Filter) Process() { + controlAnswers := <-f.ControlAnswers + for data := range f.InFilterData { + UntaggedAnswer := false + query := data["query"].(string) + numControlTags := 0 + if tags, ok := controlAnswers[query]; ok { + numControlTags = tags.http.Cardinality() + tags.cert.Cardinality() + tags.asnum.Cardinality() + tags.asname.Cardinality() + } + if numControlTags == 0 { + UntaggedAnswer = true + } + + answersMap := data["answers"].(map[string]interface{}) + flag := true + for _, answers := range answersMap { + answerSlice := answers.([]interface{}) + if len(answerSlice) == 0 { + flag = false + } + for _, answerTag := range answerSlice { + if answerTag != "no_tags" { + flag = false + } + } + } + if flag { + UntaggedAnswer = true + } + data["UntaggedAnswer"] = UntaggedAnswer + f.OutFilterData <- data + } +} + +//Fetch is the component for applying blockpage and unexpected responses regex matching +type Fetch struct { + InFetchData <-chan map[string]interface{} + InBlockpageData <-chan map[string]*regexp.Regexp + InFalsePositiveData <-chan map[string]*regexp.Regexp + HTMLPages <-chan map[string]string + OutFetchData chan<- map[string]interface{} + +} + +//Process applies blockpage and unexpected responses regex matching +func (f *Fetch) Process() { + blockpages := <-f.InBlockpageData + falsePositives := <-f.InFalsePositiveData + html := <-f.HTMLPages + blockpageOrder := make([]string, len(blockpages)) + i := 0 + for k := range blockpages { + blockpageOrder[i] = k + i++ + } + + //Sort to map the blockpage strings in the right order + sort.Strings(blockpageOrder) + + for data := range f.InFetchData { + fetched := false + if len(html) != 0 { + + fetched = true + var confirmed string + var fingerprint string + if data["passed"].(bool) == false && data["UntaggedAnswer"].(bool) == false { + answersMap := data["answers"].(map[string]interface{}) + for answer, _ := range answersMap { + if body, ok := html[answer+data["query"].(string)]; ok { + for fp, pattern := range falsePositives { + if pattern.MatchString(body) { + confirmed = "false" + fingerprint = fp + break + } + } + for _, fp := range blockpageOrder { + pattern := blockpages[fp] + if pattern.MatchString(body) { + confirmed = "true" + fingerprint = fp + break + } + } + } + } + } + data["Confirmed"] = confirmed + data["Fingerprint"] = fingerprint + } + data["Fetched"] = fetched + f.OutFetchData <- data + } +} + +//Verify is the component for applying post procesing hueristics to avoid false positives +type Verify struct { + InVerifyData <-chan map[string]interface{} + CDNIPs <-chan set.Set + OutVerifyData chan<- map[string]interface{} +} + +//Process applies post processig hueristics to avoid false positives +func (v *Verify) Process() { + cdnIPs := <-v.CDNIPs + for data := range v.InVerifyData { + answersMap := data["answers"].(map[string]interface{}) + belongsToCDN := false + for answer, _ := range answersMap { + if cdnIPs.Contains(answer) { + belongsToCDN = true + } + } + data["BelongsToCDN"] = belongsToCDN + v.OutVerifyData <- data + } +} + +//Analysis is the component that stores the input and output for different types of analysis +type Analysis struct { + InAnalysisData <-chan map[string]interface{} + InAnalysisType <-chan string + OutAnalysisData chan<- map[string]interface{} +} + +//Process performs analysis on the filtered data to calcuate different types of aggregates +//TODO: Add more analysis types +func (a *Analysis) Process() { + analysisType := <-a.InAnalysisType + for data := range a.InAnalysisData { + if analysisType == "Domain" { + a.OutAnalysisData <- map[string]interface{}{"Query": data["query"], "Anomaly": (!(data["passed"].(bool)) && !(data["BelongsToCDN"].(bool)) && !(data["UntaggedAnswer"].(bool))), "Fetched": data["Fetched"], "Confirmed": data["Confirmed"], "Country": data["Geolocation"]} + } else if analysisType == "Vantage Point" { + a.OutAnalysisData <- map[string]interface{}{"Resolver": data["resolver"], "Anomaly": (!(data["passed"].(bool)) && !(data["BelongsToCDN"].(bool)) && !(data["UntaggedAnswer"].(bool))), "Fetched": data["Fetched"], "Confirmed": data["Confirmed"], "Country": data["Geolocation"]} + } + } + +} + + +//ProcessLine constructs the directed cyclic graph that handles data flow between different components. +func ProcessLine() *goflow.Graph { + network := goflow.NewGraph() + + //Add network processes + network.Add("parse", new(Parse)) + network.Add("metadata", new(MetaData)) + network.Add("filter", new(Filter)) + network.Add("verify", new(Verify)) + network.Add("fetch", new(Fetch)) + network.Add("analysis", new(Analysis)) + + + // Connect them with a channel + network.Connect("parse", "ResJSONData", "metadata", "InMetaData") + network.Connect("metadata", "OutMetaData", "filter", "InFilterData") + network.Connect("filter", "OutFilterData", "fetch", "InFetchData") + network.Connect("fetch", "OutFetchData", "verify", "InVerifyData") + network.Connect("verify", "OutVerifyData", "analysis", "InAnalysisData") + + network.MapInPort("Input", "parse", "InLine") + network.MapInPort("ControlAnswersInput", "filter", "ControlAnswers") + network.MapInPort("GeolocationInput", "metadata", "Geolocation") + network.MapInPort("BlockpageInput", "fetch", "InBlockpageData") + network.MapInPort("FalsePositiveInput", "fetch", "InFalsePositiveData") + network.MapInPort("HTMLInput", "fetch", "HTMLPages") + network.MapInPort("CDNIPInput", "verify", "CDNIPs") + network.MapInPort("AnalysisType","analysis","InAnalysisType") + + //Map the output ports for the network + network.MapOutPort("ProcessingOutput", "analysis", "OutAnalysisData") + + return network +} + +//Prompt gets the user's choice of analysis type +func Prompt() string { + + types := []analyses{ + {Name: "Websites marked as anomaly per country (csv)", Index: "Domain"}, + {Name: "Anomalies per vantage point per country (csv)", Index: "Vantage Point"}, + } + + templates := &promptui.SelectTemplates{ + Label: "{{ . }}?", + Active: "\U00002192 {{ .Name | cyan }}", + Inactive: " {{ .Name | cyan }}", + Selected: "\U00002192 {{ .Name | red | cyan }}", + Details: ` +--------- Analysis Types ---------- +{{ "Name:" | faint }} {{ .Name }} +{{ "Index:" | faint }} {{ .Index }}`, + } + + prompt := promptui.Select{ + Label: "Select Type of Analysis", + Items: types, + Templates: templates, + CursorPos: 0, + HideSelected: false, + } + + i, _, err := prompt.Run() + + if err != nil { + log.Fatal("Prompt failed: ", err.Error()) + } + + return types[i].Index +} + +//AnalyzeSatellite is the main function that handles io and set up of the network +func AnalyzeSatellite(inputFile string, outputFile string, satellitev1HtmlFile string) { + analysisType := Prompt() + blockpages, err := hquack.ReadFingerprints("https://assets.censoredplanet.org/blockpage_signatures.json") + if err != nil { + log.Fatal("Could not read blockpage data: ", err.Error()) + } + log.Info("Blockpage read successful") + falsePositives, err := hquack.ReadFingerprints("https://assets.censoredplanet.org/false_positive_signatures.json") + if err != nil { + log.Fatal("Could not read false positive data: ", err.Error()) + } + + //Load answer Tags + log.Info("Loading answer tags") + ansTags := make(map[string]*tags) + cdnIPs := set.NewSet() + ansTags, cdnIPs = loadAnsTags(inputFile, cdnRegex) + + //Load control answers + log.Info("Loading control answers with tags") + controlAnswers := make(map[string]*tagsSet) + controlAnswers = loadControls(inputFile, ansTags) + + log.Info("Loading geolocation info") + geolocation := make(map[string]string) + geolocation = loadGeolocation(inputFile) + + htmlPages := make(map[string]string) + if satellitev1HtmlFile != "" { + log.Info("Loading Satellite-v1 HTML page") + htmlPages = loadHTML(satellitev1HtmlFile) + } else { + log.Warn("HTML matching will be skipped") + } + + log.Info("Going through file: ", inputFile) + fileReader, err := os.Open(inputFile) + if err != nil { + log.Fatal("Could not open input file", err.Error()) + + } + defer fileReader.Close() + + //Read the Tar file and get the required files + //TODO: Read more than one tar file for files in 2020 + interferenceFileBytes, err := tarballReader.ReadTarball(fileReader, "interference.json") + if err != nil { + log.Fatal("Could not read tarball", err.Error()) + + } + log.Info("Interference File read: ", inputFile) + interferenceFileText := string(interferenceFileBytes) + interferenceFileTextLines := strings.Split(interferenceFileText, "\n") + log.Info("Number of lines in Interference file: ", len(interferenceFileTextLines)) + processedData := make([]map[string]interface{}, 0) + + network := ProcessLine() + + ControlAnswersInput := make(chan map[string]*tagsSet) + GeolocationInput := make(chan map[string]string) + CDNIPInput := make(chan set.Set) + HTMLInput := make(chan map[string]string) + BlockpageInput := make(chan map[string]*regexp.Regexp) + FalsePositiveInput := make(chan map[string]*regexp.Regexp) + AnalysisTypeInput := make(chan string) + ProcessingOutput := make(chan map[string]interface{}) + done := make(chan bool) + + network.SetInPort("ControlAnswersInput", ControlAnswersInput) + network.SetInPort("GeolocationInput", GeolocationInput) + network.SetInPort("CDNIPInput", CDNIPInput) + network.SetInPort("BlockpageInput", BlockpageInput) + network.SetInPort("FalsePositiveInput", FalsePositiveInput) + network.SetInPort("HTMLInput", HTMLInput) + network.SetInPort("AnalysisType", AnalysisTypeInput) + network.SetOutPort("ProcessingOutput", ProcessingOutput) + + In := make(chan string) + network.SetInPort("Input", In) + + //Start the network + wait := goflow.Run(network) + + log.Info("Network set up. Starting data flow.") + + AnalysisTypeInput <- analysisType + close(AnalysisTypeInput) + + ControlAnswersInput <- controlAnswers + close(ControlAnswersInput) + + GeolocationInput <- geolocation + close(GeolocationInput) + + BlockpageInput <- blockpages + close(BlockpageInput) + + FalsePositiveInput <- falsePositives + close(FalsePositiveInput) + + HTMLInput <- htmlPages + close(HTMLInput) + + CDNIPInput <- cdnIPs + close(CDNIPInput) + + // create and start new progress bar + bar := pb.StartNew(len(interferenceFileTextLines)) + + //Set the receiving channel + go func() { + for { + processingOutputData, more := <-ProcessingOutput + if more { + processedData = append(processedData, processingOutputData) + } else { + log.Info("Received all the dataflow output") + done <- true + return + } + } + }() + + //Send the input "interference.json" lines + for _, line := range interferenceFileTextLines { + In <- line + bar.Increment() + } + + close(In) + + <-wait + bar.Finish() + + <-done + + //Analyze the dataflow output to create simplified CSV + output := Analyze(processedData, analysisType) + if output == nil { + log.Warn("Analysis output is empty") + } + + //Write the CSV to output file + WriteToCSV(output, outputFile) +} diff --git a/analysis/pkg/satellite/util.go b/analysis/pkg/satellite/util.go new file mode 100644 index 0000000..866917c --- /dev/null +++ b/analysis/pkg/satellite/util.go @@ -0,0 +1,199 @@ +//Copyright 2021 Censored Planet + +// Package satellite contains analysis scripts for satellite +package satellite + +import ( + "strings" + "os" + "encoding/json" + "regexp" + "bufio" + "github.com/censoredplanet/censoredplanet/analysis/pkg/tarballReader" + set "github.com/deckarep/golang-set" + log "github.com/sirupsen/logrus" + +) + +type tags struct { + http string + cert string + asnum float64 + asname string +} + +type tagsSet struct { + ip set.Set + http set.Set + cert set.Set + asnum set.Set + asname set.Set +} + +//newTagsSet creates a new set of tags for Satellite responses +//Returns new tag set +func newTagsSet() *tagsSet { + t := new(tagsSet) + t.ip = set.NewSet() + t.http = set.NewSet() + t.cert = set.NewSet() + t.asnum = set.NewSet() + t.asname = set.NewSet() + return t +} + +//loadAnsTags loads Satellite answers IPs and their tags from "tagged_answers.json" +//Input - input tar.gz file, and cdn regex for finding CDN IPs +//Output - Map of answers IPs and tags, and set of CDN IPs +func loadAnsTags(inputFile string, cdnRegex *regexp.Regexp) (map[string]*tags, set.Set) { + fileReader, err := os.Open(inputFile) + if err != nil { + log.Fatal("Could not open input file", err.Error()) + + } + defer fileReader.Close() + taggedAnswersFileBytes, err := tarballReader.ReadTarball(fileReader, "tagged_answers.json") + if err != nil { + log.Fatal("Could not read tarball: ", err.Error()) + } + log.Info("Tagged Answers File read") + taggedAnswersFileText := string(taggedAnswersFileBytes) + taggedAnswersTextLines := strings.Split(taggedAnswersFileText, "\n") + log.Info("Number of lines in Tagged Answers file: ", len(taggedAnswersTextLines)) + ansTags := make(map[string]*tags) + cdnIPs := set.NewSet() + for _, line := range taggedAnswersTextLines { + var jsonData map[string]interface{} + if err := json.Unmarshal([]byte(line), &jsonData); err != nil { + log.Warn("Cannot unmarshal Tagged Answers JSON data: ", line, ", Error: ", err.Error()) + continue + } + ip := jsonData["ip"].(string) + if ansTags[ip] == nil { + ansTags[ip] = new(tags) + } + if jsonData["http"] != nil { + ansTags[ip].http = jsonData["http"].(string) + } + if jsonData["cert"] != nil { + ansTags[ip].cert = jsonData["cert"].(string) + } + if jsonData["asnum"] != nil { + ansTags[ip].asnum = jsonData["asnum"].(float64) + } + if jsonData["asname"] != nil { + ansTags[ip].asname = jsonData["asname"].(string) + if cdnRegex.MatchString(jsonData["asname"].(string)) { + cdnIPs.Add(ip) + } + } + } + return ansTags, cdnIPs +} + +//loadControls loads DNS resolution answer from Satellite's control resolvers +//Input - input tar.gz file, and tagged answers +//Output - Tagged control resolver answers +func loadControls(inputFile string, ansTags map[string]*tags) map[string]*tagsSet { + fileReader, err := os.Open(inputFile) + if err != nil { + log.Fatal("Could not open input file", err.Error()) + + } + defer fileReader.Close() + controlAnswersFileBytes, err := tarballReader.ReadTarball(fileReader, "answers_control.json") + if err != nil { + log.Fatal("Could not read tarball: ", err.Error()) + + } + log.Info("Control Answers File read") + controlAnswersFileText := string(controlAnswersFileBytes) + controlAnswersTextLines := strings.Split(controlAnswersFileText, "\n") + log.Info("Number of lines in Control Answers file: ", len(controlAnswersTextLines)) + controls := make(map[string]*tagsSet) + for _, line := range controlAnswersTextLines { + var jsonData map[string]interface{} + if err := json.Unmarshal([]byte(line), &jsonData); err != nil { + log.Warn("Cannot unmarshal Tagged Answers JSON data: ", line, ", Error: ", err.Error()) + continue + } + query := jsonData["query"].(string) + answers := jsonData["answers"].([]interface{}) + if controls[query] == nil { + controls[query] = newTagsSet() + } + + for _, answer := range answers { + controls[query].ip.Add(answer.(string)) + // Add the tags corresponding to this IP answer to control set + if t, ok := ansTags[answer.(string)]; ok { + if t.http != "" { + controls[query].http.Add(t.http) + } + if t.cert != "" { + controls[query].cert.Add(t.cert) + } + if t.asnum != 0 { + controls[query].asnum.Add(t.asnum) + } + if t.asname != "" { + controls[query].asname.Add(t.asname) + } + } + } + } + return controls +} + +//loadGeolocation gets country information from the "tagged_resolvers.json" file +//Input - tar.gz file +//Output - Geolocation data +func loadGeolocation(inputFile string) map[string]string { + fileReader, err := os.Open(inputFile) + if err != nil { + log.Fatal("Could not open input file", err.Error()) + + } + defer fileReader.Close() + taggedResolversFileBytes, err := tarballReader.ReadTarball(fileReader, "tagged_resolvers.json") + if err != nil { + log.Fatal("Could not read tarball: ", err.Error()) + + } + log.Info("Tagged resolvers File read") + taggedResolversFileText := string(taggedResolversFileBytes) + taggedResolversTextLines := strings.Split(taggedResolversFileText, "\n") + log.Info("Number of lines in Tagged resolvers file: ", len(taggedResolversTextLines)) + geolocation := make(map[string]string) + for _, line := range taggedResolversTextLines { + var jsonData map[string]interface{} + if err := json.Unmarshal([]byte(line), &jsonData); err != nil { + log.Warn("Cannot unmarshal Tagged resolvers JSON data: ", line, ", Error: ", err.Error()) + continue + } + geolocation[jsonData["resolver"].(string)] = jsonData["country"].(string) + } + return geolocation +} + +//loadHTML loads HTML data from provided input file +//Input - satellitev1HTMLFile from user input, a JSON file containing ip, query, and HTML body +//Returns - Map of answer IP, query, and HTML body +func loadHTML(inputFile string) map[string]string { + fileReader, err := os.Open(inputFile) + if err != nil { + log.Fatal("Could not open input file", err.Error()) + } + defer fileReader.Close() + s := bufio.NewScanner(fileReader) + htmlData := make(map[string]string) + for s.Scan() { + data := make(map[string]interface{}) + if err := json.Unmarshal(s.Bytes(), &data); err != nil { + log.Warn("Cannot unmarshal HTML JSON data: ", data, ", Error: ", err.Error()) + continue + } + htmlData[data["ip"].(string)+data["query"].(string)] = data["body"].(string) + } + return htmlData +} \ No newline at end of file diff --git a/analysis/pkg/tarballReader/tarballReader.go b/analysis/pkg/tarballReader/tarballReader.go index 381929c..6b52152 100644 --- a/analysis/pkg/tarballReader/tarballReader.go +++ b/analysis/pkg/tarballReader/tarballReader.go @@ -15,9 +15,9 @@ import ( ) //ReadTarball reads the tar.gz file and sends back the file bytes -//Input - tar.gz file reader +//Input - tar.gz file , Filename //Output - File byte stream, error -func ReadTarball(reader io.Reader) ([]byte, error) { +func ReadTarball(reader io.Reader, filename string) ([]byte, error) { gzr, err := gzip.NewReader(reader) if err != nil { return nil, err @@ -28,12 +28,12 @@ func ReadTarball(reader io.Reader) ([]byte, error) { for { hdr, err := tr.Next() if err == io.EOF { - return nil, errors.New("Results file not found") + return nil, errors.New("File not found") } if err != nil { return nil, err } - if strings.Contains(hdr.Name, "results.json") { + if strings.Contains(hdr.Name, filename) { bar := pb.New(int(hdr.Size)) bar.Start() barReader := bar.NewProxyReader(tr) @@ -45,5 +45,4 @@ func ReadTarball(reader io.Reader) ([]byte, error) { return bs, nil } } - -} +} \ No newline at end of file diff --git a/docs/satellitev1.rst b/docs/satellitev1.rst index 29fe396..b885263 100644 --- a/docs/satellitev1.rst +++ b/docs/satellitev1.rst @@ -11,35 +11,22 @@ Satellite-v1 corresponds to measurements from 2018 to February 2021. See Satelli The published data has the following directory structure: :: CP_Satellite-YYYY-MM-DD-HH-MM-SS/ - |-- log.json - |-- raw/ - | |-- answers_control.json - | |-- answers_err.json - | |-- answers_ip.json - | |-- answers.json - | |-- answers_raw.json - | |-- dns.pkt - | |-- interference_err.json - | |-- interference.json - | |-- resolvers_err.json - | |-- resolvers_ip.json - | |-- resolvers.json - | |-- resolvers_ptr.json - | |-- resolvers_raw.json - | |-- tagged_answers.json - | |-- tagged_resolvers.json - |-- stat/ - |-- stat_answers.json - |-- stat_interference_agg.json - |-- stat_interference_count.json - |-- stat_interference_country_domain.json - |-- stat_interference_country.json - |-- stat_interference_country_percentage.json - |-- stat_interference_err.json - |-- stat_interference.json - |-- stat_resolvers_country.json - |-- stat_resolvers.json - |-- stat_tagged.json + |-- answers_control.json + |-- answers_err.json + |-- answers_ip.json + |-- answers.json + |-- answers_raw.json + |-- dns.pkt + |-- interference_err.json + |-- interference.json + |-- resolvers_err.json + |-- resolvers_ip.json + |-- resolvers.json + |-- resolvers_ptr.json + |-- resolvers_raw.json + |-- tagged_answers.json + |-- tagged_resolvers.json + ******* diff --git a/docs/satellitev2.rst b/docs/satellitev2.rst index ff2d07c..0fdfab3 100644 --- a/docs/satellitev2.rst +++ b/docs/satellitev2.rst @@ -9,23 +9,21 @@ Satellite is Censored Planet's tool to detect DNS interference. Refer to the fol The published data has the following directory structure: :: CP_Satellite-YYYY-MM-DD-HH-MM-SS/ - |-- log.json - |-- raw/ - |-- blockpages.json - |-- dns.pkt - |-- resolvers_err.json - |-- resolvers_ip.json - |-- resolvers.json - |-- resolvers_ptr.json - |-- resolvers_raw.json - |-- responses_control.json - |-- responses_ip.json - |-- responses.json - |-- responses_raw.json - |-- results.json - |-- results_verified.json - |-- tagged_responses.json - |-- tagged_resolvers.json + |-- blockpages.json + |-- dns.pkt + |-- resolvers_err.json + |-- resolvers_ip.json + |-- resolvers.json + |-- resolvers_ptr.json + |-- resolvers_raw.json + |-- responses_control.json + |-- responses_ip.json + |-- responses.json + |-- responses_raw.json + |-- results.json + |-- results_verified.json + |-- tagged_responses.json + |-- tagged_resolvers.json ------ Probe diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6203a7d --- /dev/null +++ b/go.mod @@ -0,0 +1,12 @@ +module github.com/censoredplanet/censoredplanet + +go 1.16 + +require ( + github.com/cheggaaa/pb/v3 v3.0.7 + github.com/deckarep/golang-set v1.7.1 + github.com/manifoldco/promptui v0.8.0 + github.com/oschwald/geoip2-golang v1.5.0 + github.com/sirupsen/logrus v1.8.1 + github.com/trustmaster/goflow v0.0.0-20210124103720-b48b2ee88975 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..c68411e --- /dev/null +++ b/go.sum @@ -0,0 +1,63 @@ +github.com/VividCortex/ewma v1.1.1 h1:MnEK4VOv6n0RSY4vtRe3h11qjxL3+t0B8yOL8iMXdcM= +github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= +github.com/cheggaaa/pb/v3 v3.0.7 h1:58wduqwoqpsklsCZ7NZ5dsrCqPWv2t0+p85FcBds6XY= +github.com/cheggaaa/pb/v3 v3.0.7/go.mod h1:X1L61/+36nz9bjIsrDU52qHKOQukUQe2Ge+YvGuquCw= +github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/deckarep/golang-set v1.7.1 h1:SCQV0S6gTtp6itiFrTqI+pfmJ4LN85S1YzhDf9rTHJQ= +github.com/deckarep/golang-set v1.7.1/go.mod h1:93vsz/8Wt4joVM7c2AVqh+YRMiUSc14yDtF28KmMOgQ= +github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/juju/ansiterm v0.0.0-20180109212912-720a0952cc2a h1:FaWFmfWdAUKbSCtOU2QjDaorUexogfaMgbipgYATUMU= +github.com/juju/ansiterm v0.0.0-20180109212912-720a0952cc2a/go.mod h1:UJSiEoRfvx3hP73CvoARgeLjaIOjybY9vj8PUPPFGeU= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/lunixbochs/vtclean v0.0.0-20180621232353-2d01aacdc34a h1:weJVJJRzAJBFRlAiJQROKQs8oC9vOxvm4rZmBBk0ONw= +github.com/lunixbochs/vtclean v0.0.0-20180621232353-2d01aacdc34a/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI= +github.com/manifoldco/promptui v0.8.0 h1:R95mMF+McvXZQ7j1g8ucVZE1gLP3Sv6j9vlF9kyRqQo= +github.com/manifoldco/promptui v0.8.0/go.mod h1:n4zTdgP0vr0S3w7/O/g98U+e0gwLScEXGwov2nIKuGQ= +github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54= +github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/oschwald/geoip2-golang v1.5.0 h1:igg2yQIrrcRccB1ytFXqBfOHCjXWIoMv85lVJ1ONZzw= +github.com/oschwald/geoip2-golang v1.5.0/go.mod h1:xdvYt5xQzB8ORWFqPnqMwZpCpgNagttWdoZLlJQzg7s= +github.com/oschwald/maxminddb-golang v1.8.0 h1:Uh/DSnGoxsyp/KYbY1AuP0tYEwfs0sCph9p/UMXK/Hk= +github.com/oschwald/maxminddb-golang v1.8.0/go.mod h1:RXZtst0N6+FY/3qCNmZMBApR19cdQj43/NM9VkrNAis= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/trustmaster/goflow v0.0.0-20210124103720-b48b2ee88975 h1:bWdvjLfpqxfckMX/o6G8hza0mx+301NkUEna37uKkWI= +github.com/trustmaster/goflow v0.0.0-20210124103720-b48b2ee88975/go.mod h1:ZwcUORj/boS7D5glapIfMUA3ZxB61hWKdb672oXyJa8= +golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191224085550-c709ea063b76/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42 h1:vEOn+mP2zCOVzKckCZy6YsCtDblrpj/w7B9nxGNELpg= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From c2fa561974b131f7c9f06eea1fad6a584a29963d Mon Sep 17 00:00:00 2001 From: ram Date: Fri, 2 Apr 2021 14:43:02 -0400 Subject: [PATCH 2/3] #3 Changing makefile and running go fmt --- analysis/Makefile | 6 ++-- analysis/cmd/analyze-cp.go | 24 ++++++++-------- analysis/pkg/satellite/satellite.go | 32 +++++++++------------ analysis/pkg/satellite/util.go | 17 ++++++----- analysis/pkg/tarballReader/tarballReader.go | 2 +- 5 files changed, 38 insertions(+), 43 deletions(-) diff --git a/analysis/Makefile b/analysis/Makefile index 6c5152d..40ddca8 100644 --- a/analysis/Makefile +++ b/analysis/Makefile @@ -1,5 +1,5 @@ all: mkdir -p bin - (cd cmd; go get) - (cd cmd; go vet) - go build -o bin/analyze-cp cmd/analyze-cp.go \ No newline at end of file + go get -d ./cmd/ + go install cmd/analyze-cp.go + go build -o bin/analyze-cp cmd/analyze-cp.go diff --git a/analysis/cmd/analyze-cp.go b/analysis/cmd/analyze-cp.go index 530ecd8..e64d3b5 100644 --- a/analysis/cmd/analyze-cp.go +++ b/analysis/cmd/analyze-cp.go @@ -1,5 +1,5 @@ //Copyright 2021 Censored Planet -//analyze-cp provides example analysis for Censored Planet public raw data. +//analyze-cp provides example analysis for Censored Planet public raw data. package main import ( @@ -16,15 +16,15 @@ import ( //Flags stores values from user-entered command line flags type Flags struct { - inputFile string - outputFile string - blockpageFile string - falsePositiveFile string + inputFile string + outputFile string + blockpageFile string + falsePositiveFile string satellitev1HtmlFile string - skipDatesFile string - mmdbFile string - logLevel uint - logFileFlag string + skipDatesFile string + mmdbFile string + logLevel uint + logFileFlag string } //ReadSkipScanDates checkes whether the current scandate of the file matches any of the scandates to skip analysis @@ -104,7 +104,7 @@ func main() { technique := strings.ToLower(strings.Split(strings.Split(filename, "-")[0], "_")[1]) protocol := "" scandate := "" - if technique == "quack"{ + if technique == "quack" { protocol = strings.ToLower(strings.Split(filename, "-")[1]) scandate = strings.Split(filename, "-")[2] + strings.Split(filename, "-")[3] + strings.Split(filename, "-")[4] } else if technique == "satellite" { @@ -116,7 +116,7 @@ func main() { //Should this scan be skipped? if skip := ReadSkipScanDates("https://assets.censoredplanet.org/avoid_scandates.txt", technique, scandate); skip == true { log.Fatal("This scan is in the do-not-include list.") - } + } //Initialize maxmind log.Info("Input File okay!") @@ -134,5 +134,5 @@ func main() { log.Fatal("Satellitev2 support is not provided yet") } satellite.AnalyzeSatellite(f.inputFile, f.outputFile, f.satellitev1HtmlFile) - } + } } diff --git a/analysis/pkg/satellite/satellite.go b/analysis/pkg/satellite/satellite.go index 8bf2406..7c38ebb 100644 --- a/analysis/pkg/satellite/satellite.go +++ b/analysis/pkg/satellite/satellite.go @@ -10,18 +10,17 @@ import ( "strings" //"github.com/censoredplanet/censoredplanet/analysis/pkg/geolocate" - "github.com/censoredplanet/censoredplanet/analysis/pkg/tarballReader" "github.com/censoredplanet/censoredplanet/analysis/pkg/hquack" + "github.com/censoredplanet/censoredplanet/analysis/pkg/tarballReader" "github.com/cheggaaa/pb/v3" + set "github.com/deckarep/golang-set" "github.com/manifoldco/promptui" log "github.com/sirupsen/logrus" "github.com/trustmaster/goflow" - set "github.com/deckarep/golang-set" ) - //Big CDNs regex -var cdnRegex = regexp.MustCompile("AMAZON|Akamai|OPENDNS|CLOUDFLARENET|GOOGLE") +var cdnRegex = regexp.MustCompile("AMAZON|Akamai|OPENDNS|CLOUDFLARENET|GOOGLE") //analyses stores the format for user prompt type analyses struct { @@ -70,9 +69,9 @@ func (m *MetaData) Process() { //Filter is the component for marking untagged measurements type Filter struct { - InFilterData <-chan map[string]interface{} + InFilterData <-chan map[string]interface{} ControlAnswers <-chan map[string]*tagsSet - OutFilterData chan<- map[string]interface{} + OutFilterData chan<- map[string]interface{} } //Process marks untagged measurements @@ -88,7 +87,7 @@ func (f *Filter) Process() { if numControlTags == 0 { UntaggedAnswer = true } - + answersMap := data["answers"].(map[string]interface{}) flag := true for _, answers := range answersMap { @@ -112,12 +111,11 @@ func (f *Filter) Process() { //Fetch is the component for applying blockpage and unexpected responses regex matching type Fetch struct { - InFetchData <-chan map[string]interface{} + InFetchData <-chan map[string]interface{} InBlockpageData <-chan map[string]*regexp.Regexp InFalsePositiveData <-chan map[string]*regexp.Regexp - HTMLPages <-chan map[string]string - OutFetchData chan<- map[string]interface{} - + HTMLPages <-chan map[string]string + OutFetchData chan<- map[string]interface{} } //Process applies blockpage and unexpected responses regex matching @@ -138,7 +136,7 @@ func (f *Fetch) Process() { for data := range f.InFetchData { fetched := false if len(html) != 0 { - + fetched = true var confirmed string var fingerprint string @@ -175,7 +173,7 @@ func (f *Fetch) Process() { //Verify is the component for applying post procesing hueristics to avoid false positives type Verify struct { InVerifyData <-chan map[string]interface{} - CDNIPs <-chan set.Set + CDNIPs <-chan set.Set OutVerifyData chan<- map[string]interface{} } @@ -211,12 +209,11 @@ func (a *Analysis) Process() { a.OutAnalysisData <- map[string]interface{}{"Query": data["query"], "Anomaly": (!(data["passed"].(bool)) && !(data["BelongsToCDN"].(bool)) && !(data["UntaggedAnswer"].(bool))), "Fetched": data["Fetched"], "Confirmed": data["Confirmed"], "Country": data["Geolocation"]} } else if analysisType == "Vantage Point" { a.OutAnalysisData <- map[string]interface{}{"Resolver": data["resolver"], "Anomaly": (!(data["passed"].(bool)) && !(data["BelongsToCDN"].(bool)) && !(data["UntaggedAnswer"].(bool))), "Fetched": data["Fetched"], "Confirmed": data["Confirmed"], "Country": data["Geolocation"]} - } + } } } - //ProcessLine constructs the directed cyclic graph that handles data flow between different components. func ProcessLine() *goflow.Graph { network := goflow.NewGraph() @@ -229,7 +226,6 @@ func ProcessLine() *goflow.Graph { network.Add("fetch", new(Fetch)) network.Add("analysis", new(Analysis)) - // Connect them with a channel network.Connect("parse", "ResJSONData", "metadata", "InMetaData") network.Connect("metadata", "OutMetaData", "filter", "InFilterData") @@ -244,7 +240,7 @@ func ProcessLine() *goflow.Graph { network.MapInPort("FalsePositiveInput", "fetch", "InFalsePositiveData") network.MapInPort("HTMLInput", "fetch", "HTMLPages") network.MapInPort("CDNIPInput", "verify", "CDNIPs") - network.MapInPort("AnalysisType","analysis","InAnalysisType") + network.MapInPort("AnalysisType", "analysis", "InAnalysisType") //Map the output ports for the network network.MapOutPort("ProcessingOutput", "analysis", "OutAnalysisData") @@ -332,7 +328,7 @@ func AnalyzeSatellite(inputFile string, outputFile string, satellitev1HtmlFile s } defer fileReader.Close() - //Read the Tar file and get the required files + //Read the Tar file and get the required files //TODO: Read more than one tar file for files in 2020 interferenceFileBytes, err := tarballReader.ReadTarball(fileReader, "interference.json") if err != nil { diff --git a/analysis/pkg/satellite/util.go b/analysis/pkg/satellite/util.go index 866917c..0098676 100644 --- a/analysis/pkg/satellite/util.go +++ b/analysis/pkg/satellite/util.go @@ -4,15 +4,14 @@ package satellite import ( - "strings" - "os" - "encoding/json" - "regexp" "bufio" + "encoding/json" "github.com/censoredplanet/censoredplanet/analysis/pkg/tarballReader" - set "github.com/deckarep/golang-set" + set "github.com/deckarep/golang-set" log "github.com/sirupsen/logrus" - + "os" + "regexp" + "strings" ) type tags struct { @@ -109,7 +108,7 @@ func loadControls(inputFile string, ansTags map[string]*tags) map[string]*tagsSe log.Info("Control Answers File read") controlAnswersFileText := string(controlAnswersFileBytes) controlAnswersTextLines := strings.Split(controlAnswersFileText, "\n") - log.Info("Number of lines in Control Answers file: ", len(controlAnswersTextLines)) + log.Info("Number of lines in Control Answers file: ", len(controlAnswersTextLines)) controls := make(map[string]*tagsSet) for _, line := range controlAnswersTextLines { var jsonData map[string]interface{} @@ -163,7 +162,7 @@ func loadGeolocation(inputFile string) map[string]string { log.Info("Tagged resolvers File read") taggedResolversFileText := string(taggedResolversFileBytes) taggedResolversTextLines := strings.Split(taggedResolversFileText, "\n") - log.Info("Number of lines in Tagged resolvers file: ", len(taggedResolversTextLines)) + log.Info("Number of lines in Tagged resolvers file: ", len(taggedResolversTextLines)) geolocation := make(map[string]string) for _, line := range taggedResolversTextLines { var jsonData map[string]interface{} @@ -196,4 +195,4 @@ func loadHTML(inputFile string) map[string]string { htmlData[data["ip"].(string)+data["query"].(string)] = data["body"].(string) } return htmlData -} \ No newline at end of file +} diff --git a/analysis/pkg/tarballReader/tarballReader.go b/analysis/pkg/tarballReader/tarballReader.go index 6b52152..3560966 100644 --- a/analysis/pkg/tarballReader/tarballReader.go +++ b/analysis/pkg/tarballReader/tarballReader.go @@ -45,4 +45,4 @@ func ReadTarball(reader io.Reader, filename string) ([]byte, error) { return bs, nil } } -} \ No newline at end of file +} From 2ce49d549b7ffa2b9381c96c49920026cd14c790 Mon Sep 17 00:00:00 2001 From: ram Date: Fri, 2 Apr 2021 14:43:51 -0400 Subject: [PATCH 3/3] #3 Changing makefile again --- analysis/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/analysis/Makefile b/analysis/Makefile index 40ddca8..436744c 100644 --- a/analysis/Makefile +++ b/analysis/Makefile @@ -1,5 +1,4 @@ all: mkdir -p bin - go get -d ./cmd/ go install cmd/analyze-cp.go go build -o bin/analyze-cp cmd/analyze-cp.go