Skip to content

Commit

Permalink
feat: enhanced string intering and added concurrent processing
Browse files Browse the repository at this point in the history
  • Loading branch information
wyattjoh committed Mar 30, 2022
1 parent 358babb commit f35c443
Show file tree
Hide file tree
Showing 18 changed files with 244 additions and 106 deletions.
10 changes: 10 additions & 0 deletions README.md
Expand Up @@ -221,3 +221,13 @@ go build -o <location>
`--disableMonotonicCursorTimes` flag. This means that every timestamp emitted
that shares the same second time will automatically have it's ms time
incremented to prevent collisions.

<!-- STARTED 6.3GB RAM -->

```bash
cat __sensitive__/logs.json | jq 'select(.msg == "memory stats" and .run == 1648582446).alloc' | sort -n | tail -n1
cat __sensitive__/logs.json | jq 'select(.msg == "memory stats" and .run == 1648584673).alloc' | sort -n | tail -n1

cat __sensitive__/logs.json | jq 'select(.msg == "memory stats" and .run == 1648582446).sys' | sort -n | tail -n1
cat __sensitive__/logs.json | jq 'select(.msg == "memory stats" and .run == 1648584673).sys' | sort -n | tail -n1
```
8 changes: 4 additions & 4 deletions common/coral/commentActions.go
Expand Up @@ -6,13 +6,13 @@ import "time"
// CommentAction is the base Coral Comment Action that represents an action
// against a Comment.
type CommentAction struct {
TenantID string `json:"tenantID" validate:"required"`
TenantID string `json:"tenantID,intern" validate:"required"`
ID string `json:"id" conform:"trim" validate:"required"`
SiteID string `json:"siteID" validate:"required"`
ActionType string `json:"actionType" validate:"oneof=REACTION DONT_AGREE FLAG,required"`
SiteID string `json:"siteID,intern" validate:"required"`
ActionType string `json:"actionType,intern" validate:"oneof=REACTION DONT_AGREE FLAG,required"`
CommentID string `json:"commentID" validate:"required"`
CommentRevisionID string `json:"commentRevisionID" validate:"required"`
Reason string `json:"reason,omitempty" validate:"omitempty,oneof= COMMENT_DETECTED_BANNED_WORD COMMENT_DETECTED_LINKS COMMENT_DETECTED_PREMOD_USER COMMENT_DETECTED_RECENT_HISTORY COMMENT_DETECTED_REPEAT_POST COMMENT_DETECTED_SPAM COMMENT_DETECTED_SUSPECT_WORD COMMENT_DETECTED_TOXIC COMMENT_REPORTED_OFFENSIVE COMMENT_REPORTED_OTHER COMMENT_REPORTED_SPAM"`
Reason string `json:"reason,omitempty,intern" validate:"omitempty,oneof= COMMENT_DETECTED_BANNED_WORD COMMENT_DETECTED_LINKS COMMENT_DETECTED_PREMOD_USER COMMENT_DETECTED_RECENT_HISTORY COMMENT_DETECTED_REPEAT_POST COMMENT_DETECTED_SPAM COMMENT_DETECTED_SUSPECT_WORD COMMENT_DETECTED_TOXIC COMMENT_REPORTED_OFFENSIVE COMMENT_REPORTED_OTHER COMMENT_REPORTED_SPAM"`
AdditionalDetails string `json:"additionalDetails,omitempty"`
StoryID string `json:"storyID" validate:"required"`
UserID *string `json:"userID" validate:"required"`
Expand Down
8 changes: 4 additions & 4 deletions common/coral/commentActions_easyjson.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions common/coral/comments.go
Expand Up @@ -5,7 +5,7 @@ import "time"

type RevisionPerspective struct {
Score float64 `json:"score"`
Model string `json:"model"`
Model string `json:"model,intern"`
}

// RevisionMetadata is the metadata associated with a given Revision for a
Expand All @@ -26,23 +26,23 @@ type Revision struct {

// CommentTag is a Tag associated with a Comment in Coral.
type CommentTag struct {
Type string `json:"type" conform:"trim" validate:"oneof=STAFF FEATURED REVIEW QUESTION,required"`
Type string `json:"type,intern" conform:"trim" validate:"oneof=STAFF FEATURED REVIEW QUESTION,required"`
CreatedBy string `json:"createdBy,omitempty"`
CreatedAt Time `json:"createdAt" validate:"required"`
}

// Comment is the base Coral Comment that is used in Coral.
type Comment struct {
TenantID string `json:"tenantID" validate:"required"`
TenantID string `json:"tenantID,intern" validate:"required"`
ID string `json:"id" conform:"trim" validate:"required"`
SiteID string `json:"siteID" validate:"required"`
SiteID string `json:"siteID,intern" validate:"required"`
AncestorIDs []string `json:"ancestorIDs" validate:"required"`
ParentID string `json:"parentID,omitempty" conform:"trim"`
ParentRevisionID string `json:"parentRevisionID,omitempty" conform:"trim"`
AuthorID string `json:"authorID" conform:"trim" validate:"required"`
StoryID string `json:"storyID" conform:"trim" validate:"required"`
Revisions []Revision `json:"revisions" validate:"required"`
Status string `json:"status" conform:"trim" validate:"oneof=NONE APPROVED REJECTED PREMOD SYSTEM_WITHHELD,required"`
Status string `json:"status,intern" conform:"trim" validate:"oneof=NONE APPROVED REJECTED PREMOD SYSTEM_WITHHELD,required"`
ActionCounts map[string]int `json:"actionCounts" validate:"required"`
ChildIDs []string `json:"childIDs" validate:"required"`
Tags []CommentTag `json:"tags" validate:"required"`
Expand Down
10 changes: 5 additions & 5 deletions common/coral/comments_easyjson.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions common/coral/stories.go
Expand Up @@ -81,9 +81,9 @@ type StoryMetadata struct {

// Story is the base Coral Story that is used in Coral.
type Story struct {
TenantID string `json:"tenantID" validate:"required"`
TenantID string `json:"tenantID,intern" validate:"required"`
ID string `json:"id" conform:"trim" validate:"required"`
SiteID string `json:"siteID" validate:"required"`
SiteID string `json:"siteID,intern" validate:"required"`
URL string `json:"url" validate:"required,url"`
CommentCounts StoryCommentCounts `json:"commentCounts"`
Settings StorySettings `json:"settings"`
Expand Down
4 changes: 2 additions & 2 deletions common/coral/stories_easyjson.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions common/coral/users.go
Expand Up @@ -16,7 +16,7 @@ type UserNotifications struct {
OnFeatured bool `json:"onFeatured"`
OnStaffReplies bool `json:"onStaffReplies"`
OnModeration bool `json:"onModeration"`
DigestFrequency string `json:"digestFrequency" validate:"oneof=NONE DAILY HOURLY"`
DigestFrequency string `json:"digestFrequency,intern" validate:"oneof=NONE DAILY HOURLY"`
}

func NewUserNotifications() UserNotifications {
Expand Down Expand Up @@ -123,12 +123,12 @@ type UserCommentCounts struct {
}

type User struct {
TenantID string `json:"tenantID" validate:"required"`
TenantID string `json:"tenantID,intern" validate:"required"`
ID string `json:"id" conform:"trim" validate:"required"`
Username string `json:"username" validate:"required"`
Email string `json:"email,omitempty" conform:"email,lower" validate:"email"`
Profiles []UserProfile `json:"profiles,omitempty"`
Role string `json:"role" validate:"required,oneof=COMMENTER STAFF MODERATOR ADMIN"`
Role string `json:"role,intern" validate:"required,oneof=COMMENTER STAFF MODERATOR ADMIN"`
Notifications UserNotifications `json:"notifications"`
ModeratorNotes []string `json:"moderatorNotes"`
Status UserStatus `json:"status"`
Expand Down
6 changes: 3 additions & 3 deletions common/coral/users_easyjson.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions go.mod
Expand Up @@ -10,6 +10,7 @@ require (
github.com/go-playground/locales v0.12.1 // indirect
github.com/go-playground/universal-translator v0.16.0 // indirect
github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428 // indirect
github.com/josharian/intern v1.0.0
github.com/kr/pretty v0.1.0 // indirect
github.com/leebenson/conform v0.0.0-20190822094432-4c55492f71d7
github.com/leodido/go-urn v1.1.0 // indirect
Expand Down
79 changes: 61 additions & 18 deletions internal/utility/json.go
Expand Up @@ -4,51 +4,53 @@ import (
"bufio"
"io"
"os"
"runtime"
"sync"

"github.com/mailru/easyjson"
"github.com/pkg/errors"
)

type nopCloser struct {
io.Writer
type Writer interface {
Write(doc easyjson.Marshaler) error
Close() error
}

func (nopCloser) Close() error { return nil }
type nopJSONWriter struct{}

func NewJSONWriter(dryRun bool, fileName string) (*JSONWriter, error) {
var dest io.WriteCloser
func (d *nopJSONWriter) Write(doc easyjson.Marshaler) error { return nil }

func (d *nopJSONWriter) Close() error { return nil }

func NewJSONWriter(dryRun bool, fileName string) (Writer, error) {
if dryRun {
dest = nopCloser{io.Discard}
} else {
var err error
dest, err = os.Create(fileName)
if err != nil {
return nil, errors.Wrap(err, "could not create file for writing")
}
return &nopJSONWriter{}, nil
}

dest, err := os.Create(fileName)
if err != nil {
return nil, errors.Wrap(err, "could not create file for writing")
}

w := bufio.NewWriter(dest)

return &JSONWriter{
f: dest,
w: w,
filename: fileName,
f: dest,
w: w,
}, nil
}

type JSONWriter struct {
f io.WriteCloser
w *bufio.Writer

filename string
}

func (c *JSONWriter) Write(doc easyjson.Marshaler) error {
if _, err := easyjson.MarshalToWriter(doc, c.w); err != nil {
return errors.Wrap(err, "could not marshal output")
}

if _, err := c.w.WriteString("\n"); err != nil {
if _, err := c.w.WriteRune('\n'); err != nil {
return errors.Wrap(err, "could not write newline")
}

Expand All @@ -69,6 +71,47 @@ func (c *JSONWriter) Close() error {

type JSONReaderFn func(line int, data []byte) error

type Line struct {
LineNumber int
Data []byte
}

func ReadJSONConcurrently(fileName string, fn JSONReaderFn) error {
count := runtime.NumCPU()
ch := make(chan Line, count)
var wg sync.WaitGroup

wg.Add(count)
for i := 0; i < count; i++ {
go func() {
for line := range ch {
if err := fn(line.LineNumber, line.Data); err != nil {
panic(err)
}
}

wg.Done()
}()
}

if err := ReadJSON(fileName, func(line int, data []byte) error {

ch <- Line{
LineNumber: line,
Data: data,
}

return nil
}); err != nil {
return err
}

close(ch)
wg.Wait()

return nil
}

func ReadJSON(fileName string, fn JSONReaderFn) error {
f, err := os.Open(fileName)
if err != nil {
Expand Down

0 comments on commit f35c443

Please sign in to comment.