Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
base repository: cohosh/snowflake
base: master
head repository: cohosh/snowflake
compare: ticket21304
Checking mergeability… Don’t worry, you can still create the pull request.
  • 11 commits
  • 8 files changed
  • 0 comments
  • 1 contributor
Commits on Mar 13, 2019
…d off of how tor maps IP addresses to country codes, and relies on the provided ipv4 and ipv6 files.
Commits on Mar 15, 2019
Paths to geoip files can now be passed in as arguments.
These values will default to /usr/share/tor/geoip(6), which is where the
Debian tor-geoip package installs database files
The geoip lookup function now returns the empty string for IP addresses
that are not present in the table. New tests for invalid IP address
lookups were added and a bug fix for checking an invalid returned index
Added a test to make sure that things don't crash if we're unable to
load the provided (or default) geoip file
Now when the geoip databases are updated, the broker operator can send a
SIGHUP signal to the broker process to reload the databases without
having to restart it.
Every hour, the broker will write per-country usage metrics to a log
Commits on Mar 20, 2019
Added a scrubber that takes all logging output to the standard logger
and passes through a series of regular expressions to replace IP
addresses with safe strings (e.g., X.X.X.X:443).

Ensure server logs to stdout are also scrubbed
Commits on Mar 21, 2019
IPv6 regexes didn't need to be that precise, added more tests for
edge-cases.
Added the log scrubber to the rest of the snowflake pieces that are
written in go.
Showing with 502 additions and 5 deletions.
  1. +51 −1 broker/broker.go
  2. +216 −0 broker/geoip.go
  3. +91 −0 broker/metrics.go
  4. +27 −0 broker/snowflake-broker_test.go
  5. +21 −1 client/snowflake.go
  6. +21 −1 proxy-go/snowflake.go
  7. +27 −2 server/server.go
  8. +48 −0 server/server_test.go
@@ -10,11 +10,16 @@ import (
"crypto/tls"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"regexp"
"strings"
"time"
"os"
"os/signal"
"syscall"

"golang.org/x/crypto/acme/autocert"
)
@@ -24,6 +29,23 @@ const (
ProxyTimeout = 10
)

// An io.Writer that can be used as the output for a logger that first
// sanitizes logs and then writes to the provided io.Writer
type logScrubber struct {
output io.Writer
}

func (ls *logScrubber) Write(b []byte) (n int, err error) {
//First scrub the input of IP addresses
reIPv4 := regexp.MustCompile(`\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`)
//Note that for embedded IPv4 address, the previous regex will scrub it
reIPv6 := regexp.MustCompile(`\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?`)
scrubbedBytes := reIPv4.ReplaceAll(b, []byte("X.X.X.X"))
scrubbedBytes = reIPv6.ReplaceAll(scrubbedBytes,
[]byte("[X:X:X:X:X:X:X:X"))
return ls.output.Write(scrubbedBytes)
}

type BrokerContext struct {
snowflakes *SnowflakeHeap
// Map keeping track of snowflakeIDs required to match SDP answers from
@@ -40,7 +62,7 @@ func NewBrokerContext() *BrokerContext {
snowflakes: snowflakes,
idToSnowflake: make(map[string]*Snowflake),
proxyPolls: make(chan *ProxyPoll),
metrics: new(Metrics),
metrics: NewMetrics(),
}
}

@@ -156,6 +178,10 @@ func clientOffers(ctx *BrokerContext, w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusBadRequest)
return
}

// Get client country stats
ctx.metrics.UpdateCountryStats(r.RemoteAddr)

// Immediately fail if there are no snowflakes available.
if ctx.snowflakes.Len() <= 0 {
log.Println("Client: No snowflake proxies available.")
@@ -213,6 +239,7 @@ func debugHandler(ctx *BrokerContext, w http.ResponseWriter, r *http.Request) {
s += fmt.Sprintf("\nsnowflake %d: %s", snowflake.index, snowflake.id)
}
s += fmt.Sprintf("\n\nroundtrip avg: %d", ctx.metrics.clientRoundtripEstimate)
s += fmt.Sprintf("\n\nclient country stats: %s", ctx.metrics.countryStats.Display())
w.Write([]byte(s))
}

@@ -225,18 +252,27 @@ func main() {
var acmeEmail string
var acmeHostnamesCommas string
var addr string
var geoipDatabase string
var geoip6Database string
var disableTLS bool

flag.StringVar(&acmeEmail, "acme-email", "", "optional contact email for Let's Encrypt notifications")
flag.StringVar(&acmeHostnamesCommas, "acme-hostnames", "", "comma-separated hostnames for TLS certificate")
flag.StringVar(&addr, "addr", ":443", "address to listen on")
flag.StringVar(&geoipDatabase, "geoipdb", "/usr/share/tor/geoip", "path to correctly formatted geoip database mapping IPv4 address ranges to country codes")
flag.StringVar(&geoip6Database, "geoip6db", "/usr/share/tor/geoip6", "path to correctly formatted geoip database mapping IPv6 address ranges to country codes")
flag.BoolVar(&disableTLS, "disable-tls", false, "don't use HTTPS")
flag.Parse()

log.SetFlags(log.LstdFlags | log.LUTC)
//We want to send the log output through our scrubber first
scrubber := &logScrubber{os.Stdout}
log.SetOutput(scrubber)

ctx := NewBrokerContext()

ctx.metrics.LoadGeoipDatabases(geoipDatabase, geoip6Database)

go ctx.Broker()

http.HandleFunc("/robots.txt", robotsTxtHandler)
@@ -251,6 +287,20 @@ func main() {
Addr: addr,
}

sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGHUP)

// go routine to handle a SIGHUP signal to allow the broker operator to send
// a SIGHUP signal when the geoip database files are updated, without requiring
// a restart of the broker
go func () {
for {
signal := <-sigChan
log.Println("Received signal:", signal, ". Reloading geoip databases.")
ctx.metrics.LoadGeoipDatabases(geoipDatabase, geoip6Database)
}
}()

if acmeHostnamesCommas != "" {
acmeHostnames := strings.Split(acmeHostnamesCommas, ",")
log.Printf("ACME hostnames: %q", acmeHostnames)
@@ -0,0 +1,216 @@
/*
This code is for loading database data that maps ip addresses to countries
for collecting and presenting statistics on snowflake use that might alert us
to censorship events.
The functions here are heavily based off of how tor maintains and searches their
geoip database
The tables used for geoip data must be structured as follows:
Recognized line formats for IPv4 are:
INTIPLOW,INTIPHIGH,CC
and
"INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
integers, and CC is a country code.
Recognized line format for IPv6 is:
IPV6LOW,IPV6HIGH,CC
where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
It also recognizes, and skips over, blank lines and lines that start
with '#' (comments).
*/
package main

import(
"net"
"sort"
"os"
"bufio"
"strings"
"log"
"strconv"
)

type GeoIPTable interface {
parseEntry(string) error
Len() int
ElementAt(int) GeoIPEntry
}

type GeoIPEntry struct {
ipLow net.IP
ipHigh net.IP
country string
}

type GeoIPv4Table []GeoIPEntry
type GeoIPv6Table []GeoIPEntry

type GeoipError struct {
problem string
}

func (e *GeoipError) Error() string {
return e.problem
}

func (table GeoIPv4Table) Len() int { return len(table) }
func (table GeoIPv6Table) Len() int { return len(table) }

func (table GeoIPv4Table) ElementAt(i int) GeoIPEntry { return table[i] }
func (table GeoIPv6Table) ElementAt(i int) GeoIPEntry { return table[i] }

// Convert a geoip IP address represented as unsigned integer to net.IP
func geoipStringToIP(ipStr string) net.IP {
ip, err := strconv.ParseUint(ipStr, 10, 32)
if err != nil {
log.Println("error parsing IP ", ipStr)
return net.IPv4(0,0,0,0)
}
var bytes [4]byte
bytes[0] = byte(ip & 0xFF)
bytes[1] = byte((ip >> 8) & 0xFF)
bytes[2] = byte((ip >> 16) & 0xFF)
bytes[3] = byte((ip >> 24) & 0xFF)

return net.IPv4(bytes[3],bytes[2],bytes[1],bytes[0])
}

//Parses a line in the provided geoip file that corresponds
//to an address range and a two character country code
func (table *GeoIPv4Table) parseEntry(candidate string) error {

if candidate[0] == '#' {
return nil
}

parsedCandidate := strings.Split(candidate, ",")

if len(parsedCandidate) != 3 {
log.Println("Received strings", parsedCandidate)
return &GeoipError{
problem: "Provided geoip file is incorrectly formatted",
}
}

geoipEntry := GeoIPEntry{
ipLow: geoipStringToIP(parsedCandidate[0]),
ipHigh: geoipStringToIP(parsedCandidate[1]),
country: parsedCandidate[2],
}

*table = append(*table, geoipEntry)
return nil
}

//Parses a line in the provided geoip file that corresponds
//to an address range and a two character country code
func (table *GeoIPv6Table) parseEntry(candidate string) error {

if candidate[0] == '#' {
return nil
}

parsedCandidate := strings.Split(candidate, ",")

if len(parsedCandidate) != 3 {
return &GeoipError{
problem: "Provided geoip file is incorrectly formatted",
}
}

geoipEntry := GeoIPEntry{
ipLow: net.ParseIP(parsedCandidate[0]),
ipHigh: net.ParseIP(parsedCandidate[1]),
country: parsedCandidate[2],
}

*table = append(*table, geoipEntry)
return nil
}
//Loads provided geoip file into our tables
//Entries are stored in a table
func GeoIPLoadFile(table GeoIPTable, pathname string) error {
//open file
geoipFile , err := os.Open(pathname)
if err != nil {
log.Println("Error: " + err.Error())
return err
}
defer geoipFile.Close()

//read in strings and call parse function
scanner := bufio.NewScanner(geoipFile)
for scanner.Scan() {
err = table.parseEntry(scanner.Text())
if err != nil {
log.Println("Error: " + err.Error())
return err
}
}

log.Println("Loaded ", table.Len(), " entries into table")

return nil
}

//Determines whether the given IP address (key) is included in or less
//than the IP range of the Geoip entry.
//Outputs 0 if key is greater than the entry's IP range and 1 otherwise
func GeoIPRangeClosure(key net.IP, entry GeoIPEntry) bool {
a := key.To16()
b := entry.ipHigh.To16()

for i, v := range a {
if v != b[i] {
return v < b[i]
}
}

return true
}

func GeoIPCheckRange (key net.IP, entry GeoIPEntry) bool {
a := key.To16()
b := entry.ipLow.To16()
c := entry.ipHigh.To16()

for i, v := range a {
if v < b[i] || v > c[i] {
return false
}
}

return true
}

//Returns the country location of an IPv4 or IPv6 address.
func GetCountryByAddr(table GeoIPTable, addr string) string {
//translate addr string to IP
ip := net.ParseIP(addr)

//look IP up in database
index := sort.Search(table.Len(), func(i int) bool {
return GeoIPRangeClosure(ip, table.ElementAt(i))
})

if index == table.Len() {
return ""
}

// check to see if addr is in the range specified by the returned index
// search on IPs in invalid ranges (e.g., 127.0.0.0/8) will return the
//country code of the next highest range
log.Println("Checking index ", index)
if ! GeoIPCheckRange(ip, table.ElementAt(index)) {
return ""
}

return table.ElementAt(index).country

}

No commit comments for this range