Skip to content

Commit

Permalink
Merge pull request #11 from censoredplanet/satellitev1-analysis
Browse files Browse the repository at this point in the history
#3 Satellite-v1 raw data sample analysis
  • Loading branch information
ramakrishnansr committed Apr 5, 2021
2 parents 6f1d1a7 + 2ce49d5 commit f6362bd
Show file tree
Hide file tree
Showing 14 changed files with 899 additions and 78 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
This respository contains documentation about the raw data from the [Censored Planet Observatory](https://censoredplanet.org/data/raw) and includes code to analyze the data and run several useful observations.

## Analysis (analyze-cp)
Example analysis tool to parse the raw data files on the [Censored Planet Observatory Website](https://censoredplanet.org/data/raw). Currently, only analysis for `quack-v1` and `hyperquack-v1` data is supported. `satellite-v1` support is coming soon. The analysis tool converts raw data into digestible CSV files that contain aggregates of data at the country level. User is prompted to choose the type of output.
Example analysis tool to parse the raw data files on the [Censored Planet Observatory Website](https://censoredplanet.org/data/raw). Currently, only analysis for `quack-v1`, `hyperquack-v1`, and `satellite-v1` data is supported. `v2` support is coming soon. The analysis tool converts raw data into digestible CSV files that contain aggregates of data at the country level. User is prompted to choose the type of output.

`analyze-cp` can be compiled using the makefile in the `analysis` directory or by using `go build` in `analysis/cmd/`. `analysis-cp` needs two REQUIRED inputs (tar.gz file downloaded from [Censored Planet Observatory Website](https://censoredplanet.org/data/raw) and Maxmind GeoLite2-City.mmdb file downloaded from the [Maxmind Website](https://maxmind.com)).

Expand All @@ -15,11 +15,11 @@ This respository contains documentation about the raw data from the [Censored Pl
--input-file, REQUIRED, "Input tar.gz file (downloaded from censoredplanet.org)"
--output-file, Default - output.csv, "Output CSV file"
--mmdb-file, REQUIRED, "Maxmind Geolocation MMDB file (Download from maxmind.com)"
--satellitev1-html-file, OPTIONAL, Default - "", "JSON file that contains HTML responses for detecting blockpages from satellitev1 resolved IP addresses. The JSON file should have the following fields: 1) ip (resolved ip from satellitev1 that is marked as an anomaly), query (query performed by satellitev1), body (HTML body). If unspecified, the blockpage matching process will be skipped."
--log-file, Default - '-'(STDERR), "file name for logging"
--verbosity, Default - 3, "level of log detail (increasing from 0-5)"
```


## Documentation
The documentation is available in the `docs` directory and it is hosted [here](https://censoredplanet.readthedocs.io).

Expand Down
5 changes: 2 additions & 3 deletions analysis/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
all:
mkdir -p bin
(cd cmd; go get)
(cd cmd; go vet)
go build -o bin/analyze-cp cmd/analyze-cp.go
go install cmd/analyze-cp.go
go build -o bin/analyze-cp cmd/analyze-cp.go
47 changes: 30 additions & 17 deletions analysis/cmd/analyze-cp.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//Copyright 2021 Censored Planet
//analyze-cp provides example analysis for Censored Planet public raw data.
//analyze-cp provides example analysis for Censored Planet public raw data.
package main

import (
Expand All @@ -10,19 +10,21 @@ import (

"github.com/censoredplanet/censoredplanet/analysis/pkg/geolocate"
"github.com/censoredplanet/censoredplanet/analysis/pkg/hquack"
"github.com/censoredplanet/censoredplanet/analysis/pkg/satellite"
log "github.com/sirupsen/logrus"
)

//Flags stores values from user-entered command line flags
type Flags struct {
inputFile string
outputFile string
blockpageFile string
falsePositiveFile string
skipDatesFile string
mmdbFile string
logLevel uint
logFileFlag string
inputFile string
outputFile string
blockpageFile string
falsePositiveFile string
satellitev1HtmlFile string
skipDatesFile string
mmdbFile string
logLevel uint
logFileFlag string
}

//ReadSkipScanDates checkes whether the current scandate of the file matches any of the scandates to skip analysis
Expand All @@ -44,6 +46,7 @@ func main() {
flag := flag.NewFlagSet("flags", flag.ExitOnError)
flag.StringVar(&f.inputFile, "input-file", "", "REQUIRED - Input tar.gz file (downloaded from censoredplanet.org)")
flag.StringVar(&f.outputFile, "output-file", "output.csv", "Output csv file (default - output.csv)")
flag.StringVar(&f.satellitev1HtmlFile, "satellitev1-html-file", "", "(Optional) json file that contains HTML responses for detecting blockpages from satellitev1 resolved IP addresses. The JSON file should have the following fields: 1) ip (resolved ip), query (query performed by satellitev1), body (HTML body). If unspecified, the blockpage matching process will be skipped.")
flag.StringVar(&f.mmdbFile, "mmdb-file", "", "REQUIRED - Maxmind Geolocation MMDB file (Download from maxmind.com)")
flag.UintVar(&f.logLevel, "verbosity", 3, "level of log detail (0-5)")
flag.StringVar(&f.logFileFlag, "log-file", "-", "file name for logging, (- is stderr)")
Expand Down Expand Up @@ -92,14 +95,23 @@ func main() {
}
filename := parts[len(parts)-1]
//Compile regex for filename from Censored Planet website
r := regexp.MustCompile("CP_[a-zA-Z]+-[a-zA-Z]+-20[1-3][0-9]-[0-1][0-9]-[0-3][0-9]-[0-2][0-9]-[0-5][0-9]-[0-5][0-9].tar.gz")
r := regexp.MustCompile("CP_[a-zA-Z]+[-]*[a-zA-Z]*-20[1-3][0-9]-[0-1][0-9]-[0-3][0-9]-[0-2][0-9]-[0-5][0-9]-[0-5][0-9].tar.gz")
if !r.MatchString(filename) {
log.Fatal("Input file does not match expected file name pattern. Please use same file name pattern as in the censoredplanet.org website")
}

//Extract the scan technique and scan date
technique := strings.ToLower(strings.Split(filename, "-")[1])
scandate := strings.Split(filename, "-")[2] + strings.Split(filename, "-")[3] + strings.Split(filename, "-")[4]
technique := strings.ToLower(strings.Split(strings.Split(filename, "-")[0], "_")[1])
protocol := ""
scandate := ""
if technique == "quack" {
protocol = strings.ToLower(strings.Split(filename, "-")[1])
scandate = strings.Split(filename, "-")[2] + strings.Split(filename, "-")[3] + strings.Split(filename, "-")[4]
} else if technique == "satellite" {
scandate = strings.Split(filename, "-")[1] + strings.Split(filename, "-")[2] + strings.Split(filename, "-")[3]
} else {
log.Fatal("Unsupported technique for analysis")
}

//Should this scan be skipped?
if skip := ReadSkipScanDates("https://assets.censoredplanet.org/avoid_scandates.txt", technique, scandate); skip == true {
Expand All @@ -115,11 +127,12 @@ func main() {
log.Info("Maxmind init success")

//Start analysis
if technique == "echo" || technique == "discard" || technique == "http" || technique == "https" {
hquack.AnalyzeHquack(f.inputFile, f.outputFile, technique)
if technique == "quack" {
hquack.AnalyzeHquack(f.inputFile, f.outputFile, protocol)
} else if technique == "satellite" {
log.Fatal("Support for Satellite analysis is coming soon")
} else {
log.Fatal("Unsupported technique for analysis")
if scandate >= "20210301" {
log.Fatal("Satellitev2 support is not provided yet")
}
satellite.AnalyzeSatellite(f.inputFile, f.outputFile, f.satellitev1HtmlFile)
}
}
6 changes: 3 additions & 3 deletions analysis/pkg/hquack/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func Analyze(data []map[string]interface{}, analysisType string) map[string]map[
if _, ok := dataMap[result["Country"].(string)][result["Keyword"].(string)]["Anomalies"]; !ok {
dataMap[result["Country"].(string)][result["Keyword"].(string)]["Anomalies"] = 0
}
if _, ok := dataMap[result["Country"].(string)][result["Keyword"].(string)]["Confirmed"]; !ok {
if _, ok := dataMap[result["Country"].(string)][result["Keyword"].(string)]["Confirmations"]; !ok {
dataMap[result["Country"].(string)][result["Keyword"].(string)]["Confirmations"] = 0
}

Expand Down Expand Up @@ -50,7 +50,7 @@ func Analyze(data []map[string]interface{}, analysisType string) map[string]map[
if _, ok := dataMap[result["Country"].(string)][result["Server"].(string)]["Anomalies"]; !ok {
dataMap[result["Country"].(string)][result["Server"].(string)]["Anomalies"] = 0
}
if _, ok := dataMap[result["Country"].(string)][result["Server"].(string)]["Confirmed"]; !ok {
if _, ok := dataMap[result["Country"].(string)][result["Server"].(string)]["Confirmations"]; !ok {
dataMap[result["Country"].(string)][result["Server"].(string)]["Confirmations"] = 0
}

Expand Down Expand Up @@ -78,7 +78,7 @@ func Analyze(data []map[string]interface{}, analysisType string) map[string]map[
if _, ok := dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Anomalies"]; !ok {
dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Anomalies"] = 0
}
if _, ok := dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Confirmed"]; !ok {
if _, ok := dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Confirmations"]; !ok {
dataMap[result["Country"].(string)][result["ErrorType"].(string)]["Confirmations"] = 0
}

Expand Down
4 changes: 2 additions & 2 deletions analysis/pkg/hquack/hquack.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func ProcessLine() *goflow.Graph {
return network
}

//Prompt gets the user's choice of anaylisys type
//Prompt gets the user's choice of analysis type
func Prompt() string {

types := []analyses{
Expand Down Expand Up @@ -322,7 +322,7 @@ func AnalyzeHquack(inputFile string, outputFile string, technique string) {

//Read the Tar file
//TODO: Read more than one tar file for files in 2020
fileBytes, err := tarballReader.ReadTarball(fileReader)
fileBytes, err := tarballReader.ReadTarball(fileReader, "results.json")
if err != nil {
log.Fatal("Could not read tarball", err.Error())

Expand Down
76 changes: 76 additions & 0 deletions analysis/pkg/satellite/analysis.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
//Copyright 2021 Censored Planet

// Package satellite contains analysis scripts for satellite data
package satellite

//Analyze processes data based on the type of analysis specified
//Input - The data ([]map[string]interface{}), the analysisType (Specified by the user prompt)
//Returns - The stats about the data (map[string]map[string]map[string]int)
func Analyze(data []map[string]interface{}, analysisType string) map[string]map[string]map[string]int {
var dataMap = map[string]map[string]map[string]int{}
if analysisType == "Domain" {
for _, result := range data {
if _, ok := dataMap[result["Country"].(string)]; !ok {
dataMap[result["Country"].(string)] = map[string]map[string]int{}
}
if _, ok := dataMap[result["Country"].(string)][result["Query"].(string)]; !ok {
dataMap[result["Country"].(string)][result["Query"].(string)] = map[string]int{}
}
if _, ok := dataMap[result["Country"].(string)][result["Query"].(string)]["Measurements"]; !ok {
dataMap[result["Country"].(string)][result["Query"].(string)]["Measurements"] = 0
}
if _, ok := dataMap[result["Country"].(string)][result["Query"].(string)]["Anomalies"]; !ok {
dataMap[result["Country"].(string)][result["Query"].(string)]["Anomalies"] = 0
}
if result["Fetched"] == true {
if _, ok := dataMap[result["Country"].(string)][result["Query"].(string)]["Confirmations"]; !ok {
dataMap[result["Country"].(string)][result["Query"].(string)]["Confirmations"] = 0
}
}

dataMap[result["Country"].(string)][result["Query"].(string)]["Measurements"]++

if result["Anomaly"] == true {
dataMap[result["Country"].(string)][result["Query"].(string)]["Anomalies"]++
}
if result["Fetched"] == true {
if result["Confirmed"] == "true" {
dataMap[result["Country"].(string)][result["Query"].(string)]["Confirmations"]++
}
}
}
} else if analysisType == "Vantage Point" {
for _, result := range data {
if _, ok := dataMap[result["Country"].(string)]; !ok {
dataMap[result["Country"].(string)] = map[string]map[string]int{}
}
if _, ok := dataMap[result["Country"].(string)][result["Resolver"].(string)]; !ok {
dataMap[result["Country"].(string)][result["Resolver"].(string)] = map[string]int{}
}
if _, ok := dataMap[result["Country"].(string)][result["Resolver"].(string)]["Measurements"]; !ok {
dataMap[result["Country"].(string)][result["Resolver"].(string)]["Measurements"] = 0
}
if _, ok := dataMap[result["Country"].(string)][result["Resolver"].(string)]["Anomalies"]; !ok {
dataMap[result["Country"].(string)][result["Resolver"].(string)]["Anomalies"] = 0
}
if result["Fetched"] == true {
if _, ok := dataMap[result["Country"].(string)][result["Resolver"].(string)]["Confirmations"]; !ok {
dataMap[result["Country"].(string)][result["Resolver"].(string)]["Confirmations"] = 0
}
}

dataMap[result["Country"].(string)][result["Resolver"].(string)]["Measurements"]++

if result["Anomaly"] == true {
dataMap[result["Country"].(string)][result["Resolver"].(string)]["Anomalies"]++
}
if result["Fetched"] == true {
if result["Confirmed"] == "true" {
dataMap[result["Country"].(string)][result["Resolver"].(string)]["Confirmations"]++
}
}
}
}
return dataMap

}
44 changes: 44 additions & 0 deletions analysis/pkg/satellite/io.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//Copyright 2021 Censored Planet

// Package satellite contains analysis scripts for satellite data
package satellite

import (
"encoding/csv"
"fmt"
"os"

log "github.com/sirupsen/logrus"
)

//WriteToCSV writes a map of analyzed data to a CSV file.
//Input - The data to write (map[string]map[string]map[string]int), the analysis type ("Vantage Point", "Domain"), Output csv filename
//Returns - None
//WriteToCSV expects the data to be in a certain nested map format.
func WriteToCSV(dataMap map[string]map[string]map[string]int, filename string) {
file, err := os.Create(filename)
if err != nil {
log.Fatal("Cannot create output file: ", err.Error())
}
defer file.Close()
w := csv.NewWriter(file)
defer w.Flush()

w.Write([]string{"Country", "Vantage Point/Website", "Measurements", "Anomalies", "Confirmations"})

for country, innerMap := range dataMap {
for targetValue, statsMap := range innerMap {
if _, ok := statsMap["Confirmations"]; ok {
err := w.Write([]string{fmt.Sprintf("%v", country), fmt.Sprintf("%v", targetValue), fmt.Sprintf("%v", statsMap["Measurements"]), fmt.Sprintf("%v", statsMap["Anomalies"]), fmt.Sprintf("%v", statsMap["Confirmations"])})
if err != nil {
log.Warn("Could not write row due to: ", err.Error())
}
} else {
err := w.Write([]string{fmt.Sprintf("%v", country), fmt.Sprintf("%v", targetValue), fmt.Sprintf("%v", statsMap["Measurements"]), fmt.Sprintf("%v", statsMap["Anomalies"]), "N/A"})
if err != nil {
log.Warn("Could not write row due to: ", err.Error())
}
}
}
}
}

0 comments on commit f6362bd

Please sign in to comment.