Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial implementation of go-translate #1

Merged
merged 14 commits into from Mar 22, 2019

Add translate endpoint for translate requests

  • Loading branch information
yrliou committed Mar 18, 2019
commit 58049ebf5362324aad8b119102da7d8b67e2b7dc
@@ -0,0 +1,87 @@
package controller

import (
"fmt"
"io"
"io/ioutil"
"net/http"
"time"

"github.com/brave/go-translate/translate"
"github.com/go-chi/chi"
log "github.com/sirupsen/logrus"
)

// MSTranslateServer specifies the remote MS translate server used by
// brave-core, and it can be set to other hosts during testing.
var MSTranslateServer = "https://api.cognitive.microsofttranslator.com"

// TranslateRouter add routers for translate requests and translate script
// requests.
func TranslateRouter() chi.Router {
r := chi.NewRouter()
r.Post("/translate", Translate)
return r
}

func getHTTPClient() *http.Client {
return &http.Client{
Timeout: time.Second * 10,
}
}

// Translate converts a Google format translate request into a Microsoft format
// one which will be send to the Microsoft server, and write a Google format
// response back to the client.
func Translate(w http.ResponseWriter, r *http.Request) {
// Convert google format request to MS format
req, isAuto, err := translate.ToMicrosoftRequest(r, MSTranslateServer)
This conversation was marked as resolved by yrliou

This comment has been minimized.

This comment has been minimized.

Copy link
@yrliou

yrliou Mar 21, 2019

Author Member

Same as above, we didn't copy the request header from google request here.

This comment has been minimized.

Copy link
@jumde

jumde Mar 21, 2019

Collaborator

Resolved: ToMicrosoftRequest does not add the X-Forwarded-* headers to the request.

if err != nil {
http.Error(w, fmt.Sprintf("Error converting to MS request: %v", err), http.StatusBadRequest)
return
}

// Send translate request to MS server
client := getHTTPClient()
msResp, err := client.Do(req)
if err != nil {
http.Error(w, fmt.Sprintf("Error sending request to MS server: %v", err), http.StatusInternalServerError)
return
}
defer func() {
err := msResp.Body.Close()
if err != nil {
log.Errorf("Error closing response body stream: %v", err)
}
}()

// Set Header
w.Header().Set("Content-Type", msResp.Header["Content-Type"][0])
w.Header().Set("Access-Control-Allow-Origin", "*") // same as Google response
This conversation was marked as resolved by yrliou

This comment has been minimized.

Copy link
@jumde

jumde Mar 20, 2019

Collaborator

https://translate.googleapis.com instead of *?

This comment has been minimized.

Copy link
@yrliou

yrliou Mar 20, 2019

Author Member

This is mimicking google response and they use *, so I just follow them to avoid any surprises.


// Copy resonse body if status is not OK
if msResp.StatusCode != http.StatusOK {
w.WriteHeader(msResp.StatusCode)
_, err = io.Copy(w, msResp.Body)
if err != nil {
http.Error(w, fmt.Sprintf("Error copying MS response body: %v", err), http.StatusInternalServerError)
}
return
}

// Set google format response body
msBody, err := ioutil.ReadAll(msResp.Body)
if err != nil {
http.Error(w, fmt.Sprintf("Error reading MS response body: %v", err), http.StatusInternalServerError)
}
body, err := translate.ToGoogleResponseBody(msBody, isAuto)
if err != nil {
http.Error(w, fmt.Sprintf("Error converting to google response body: %v", err), http.StatusInternalServerError)
return
}
w.WriteHeader(msResp.StatusCode)
_, err = w.Write(body)
if err != nil {
log.Errorf("Error writing response body for translate requests: %v", err)
}
}
@@ -1,4 +1,9 @@
package main

import (
"github.com/brave/go-translate/server"
)

func main() {
server.StartServer()
}
@@ -0,0 +1,61 @@
package server

import (
"context"
"fmt"
"log"
"net/http"
"time"

"github.com/brave-intl/bat-go/middleware"
"github.com/brave/go-translate/controller"
"github.com/getsentry/raven-go"
"github.com/go-chi/chi"
chiware "github.com/go-chi/chi/middleware"
"github.com/pressly/lg"
"github.com/sirupsen/logrus"
)

func setupLogger(ctx context.Context) (context.Context, *logrus.Logger) {
logger := logrus.New()
// Redirect output from the standard logging package "log"
lg.RedirectStdlogOutput(logger)
lg.DefaultLogger = logger
ctx = lg.WithLoggerContext(ctx, logger)
return ctx, logger
}

func setupRouter(ctx context.Context, logger *logrus.Logger) (context.Context, *chi.Mux) {
r := chi.NewRouter()

r.Use(chiware.RequestID)
r.Use(chiware.RealIP)

This comment has been minimized.

Copy link
@jumde

jumde Mar 21, 2019

Collaborator

@yrliou - Why do we need RealIP, looks like it sets the X-Forwarded-* headers? https://godoc.org/github.com/go-chi/chi/middleware#RealIP

This comment has been minimized.

Copy link
@yrliou

yrliou Mar 21, 2019

Author Member

I think this is for requests coming in from brave-core, but not the requests we're sending out to Microsoft AFAIK.
And based on the doc, it's setting the remote address in the requests instead of setting the header.
It's a shared pattern from bat-go and go-update btw.

This comment has been minimized.

Copy link
@jumde

jumde Mar 22, 2019

Collaborator

My concern was not about the headers sent to Microsoft, but more in terms of adding more avenues to log user ips on the go-translate server. For now, having a log-retention policy on the infrastructure side sounds like a good idea.

r.Use(chiware.Heartbeat("/"))
r.Use(chiware.Timeout(60 * time.Second))
r.Use(middleware.BearerToken)

if logger != nil {
// Also handles panic recovery
r.Use(middleware.RequestLogger(logger))
}

r.Mount("/", controller.TranslateRouter())
r.Get("/metrics", middleware.Metrics())

return ctx, r
}

// StartServer starts the translate proxy server on port 8195
func StartServer() {
serverCtx, logger := setupLogger(context.Background())
logger.WithFields(logrus.Fields{"prefix": "main"}).Info("Starting server")

This comment has been minimized.

Copy link
@jumde

jumde Mar 20, 2019

Collaborator

We need to make sure that we are not logging any user identifiable data here.

This comment has been minimized.

Copy link
@yrliou

yrliou Mar 20, 2019

Author Member
INFO[4174] request complete                              http_method=POST http_proto=HTTP/1.1 http_scheme=http remote_addr="[::1]:57415" req_id=jocelyn-imacpro.local/aOPC3es7qv-000019 resp_bytes_length=821 resp_elapsed_ms=758.939688 resp_status=200 uri="http://localhost:8195/translate?anno=3&client=te_lib&format=html&v=1.0&sp=smrd&key=...&logld=vTE_20181015_01&sl=fr&tl=en&sp=nmt&tc=4&sr=1&tk=558905.922117&mode=1" user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"

This shares the same pattern as go-update so brave/go-update#22 applies here too.
As @jumde suggested, we should have a log-retention policy enforced on the infra side when we deploy the server.

cc @tomlowenthal

serverCtx, r := setupRouter(serverCtx, logger)
port := ":8195"
fmt.Printf("Starting server: http://localhost%s", port)
srv := http.Server{Addr: port, Handler: chi.ServerBaseContext(serverCtx, r)}
This conversation was marked as resolved by yrliou

This comment has been minimized.

Copy link
@diracdeltas

diracdeltas Mar 20, 2019

Member

does Addr: ":8195" resolve to localhost:8195 or 0.0.0.0:8195? We don't want to listen on 0.0.0.0 for security reasons

This comment has been minimized.

Copy link
@yrliou

yrliou Mar 20, 2019

Author Member

Resolved per slack discussion, listens on 0.0.0.0 but should be fine for servers serving public content. Same pattern is used in bat-go and go-update.

err := srv.ListenAndServe()
if err != nil {
raven.CaptureErrorAndWait(err, nil)
log.Panic(err)
}
}
@@ -0,0 +1,144 @@
package translate

import (
"bytes"
"encoding/json"
"errors"
"net/http"
"net/url"
"os"
"strconv"
)

// RequestBody represents JSON format of Microsoft requests.
type RequestBody struct {
Text string `json:"Text"`
}

// MicrosoftResponseBody represents JSON format of Microsoft response bodies.
// Translations's size is limited to 1 since multiple translations is not
// compatible with Google.
// Format with auto-detect source language:
// [
// {
// "detectedLanguage": {"language": "de", "score": 1.0},
// "translations": [{"text": "Hallo", "to": "en"}]
// },
// {
// "detectedLanguage": {"language": "de", "score": 1.0},
// "translations": [{"text": "Welt", "to": "en"}]
// }
// ]
// Format without auto-detect source language:
// [
// {
// "translations": [{"text": "Hallo", "to": "en"}]
// },
// {
// "translations": [{"text": "Welt", "to": "en"}]
// }
// ]
//
// score and to are not saved in this struct because we don't need them to
// convert to a google format response.
type MicrosoftResponseBody []struct {
DetectedLang struct {
Language string `json:"language"`
} `json:"detectedLanguage,omitempty"`
Translations [1]struct {
Text string `json:"text"`
} `json:"translations"`
}

const (
translateEndpoint = "/translate?api-version=3.0"
)

// ToMicrosoftRequest parses the input Google format translate request and
// return a corresponding Microsoft format request.
func ToMicrosoftRequest(r *http.Request, serverURL string) (*http.Request, bool, error) {
msURL := serverURL + translateEndpoint
// Parse google format query parameters
slVals := r.URL.Query()["sl"]
if len(slVals) != 1 {
return nil, false, errors.New("invalid query parameter format: There should be one sl parameter")
}
tlVals := r.URL.Query()["tl"]
if len(tlVals) != 1 {
return nil, false, errors.New("invalid query parameter format: There should be one tl parameter")
}
from := slVals[0]
to := tlVals[0]

// Set MS format query parameters
u, err := url.Parse(msURL)
if err != nil {
return nil, false, err
}
q := u.Query()
if from != "auto" {
q.Add("from", from)
}
q.Add("to", to)
q.Add("textType", "html")
u.RawQuery = q.Encode()

// Convert Google format request body into MS format request body
err = r.ParseForm()
if err != nil {
return nil, false, err
}
qVals := r.PostForm["q"]

// Set the request body
reqBody := make([]RequestBody, len(qVals))
for i, q := range qVals {
reqBody[i] = RequestBody{q}
}

body, err := json.Marshal(reqBody)
if err != nil {
return nil, false, err
}

// Create the HTTP request
req, err := http.NewRequest("POST", u.String(), bytes.NewBuffer(body))
if err != nil {
return nil, false, err
}

// Set request headers
req.Header.Add("Content-Type", "application/json")
req.Header.Add("Content-Length", strconv.FormatInt(req.ContentLength, 10))
req.Header.Add("Ocp-Apim-Subscription-Key", os.Getenv("MS_TRANSLATE_API_KEY"))
return req, from == "auto", nil
}

// ToGoogleResponseBody parses the input Microsoft response and return the JSON
// response body in Google format.
func ToGoogleResponseBody(body []byte, isAuto bool) ([]byte, error) {
// Parse MS response body
var msResp MicrosoftResponseBody
err := json.Unmarshal(body, &msResp)
if err != nil {
return nil, err
}

// Source language is specified, google result format: ["aa", "bb", ...]
if !isAuto {
body := make([]string, len(msResp))
for i, responseBody := range msResp {
body[i] = responseBody.Translations[0].Text
}
return json.Marshal(body)
}

// Source language is auto detected,
// google result format: [["aa", "from_len_a"], ["bb", "from_len_b"], ...]
bodyAuto := make([][2]string, len(msResp))
for i, responseBody := range msResp {
bodyAuto[i][0] = responseBody.Translations[0].Text
bodyAuto[i][1] = responseBody.DetectedLang.Language
}
return json.Marshal(bodyAuto)
}
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.