Skip to content
This repository was archived by the owner on Jul 16, 2024. It is now read-only.

Commit b77b366

Browse files
authored
Modify query and mutation to upsert (#21)
1 parent 024f970 commit b77b366

File tree

1 file changed

+46
-138
lines changed

1 file changed

+46
-138
lines changed

main.go

Lines changed: 46 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -44,30 +44,6 @@ import (
4444
const (
4545
cTimeFormat = "Mon Jan 02 15:04:05 -0700 2006"
4646
cDgraphTimeFormat = "2006-01-02T15:04:05.999999999+10:00"
47-
48-
cDgraphTweetQuery = `
49-
query all($tweetID: string) {
50-
all(func: eq(id_str, $tweetID)) {
51-
uid
52-
}
53-
}
54-
`
55-
56-
cDgraphUserQuery = `
57-
query all($userID: string) {
58-
all(func: eq(user_id, $userID)) {
59-
uid
60-
user_id
61-
user_name
62-
screen_name
63-
description
64-
friends_count
65-
verified
66-
profile_banner_url
67-
profile_image_url
68-
}
69-
}
70-
`
7147
)
7248

7349
var (
@@ -128,6 +104,41 @@ type twitterTweet struct {
128104
Retweet bool `json:"retweet"`
129105
}
130106

107+
func buildQuery(tweet *twitterTweet) string {
108+
tweetQuery := `t as var(func: eq(id_str, "%s"))`
109+
userQuery := `%s as var(func: eq(user_id, "%s"))`
110+
111+
query := make([]string, len(tweet.Mention)+2)
112+
113+
query[0] = fmt.Sprintf(tweetQuery, tweet.IDStr)
114+
tweet.UID = "uid(t)"
115+
116+
query[1] = fmt.Sprintf(userQuery, "u", tweet.Author.UserID)
117+
tweet.Author.UID = "uid(u)"
118+
119+
usersMap := make(map[string]string)
120+
usersMap[tweet.Author.UserID] = "u"
121+
122+
// We will query only once for every user. We are storing all the users in the map who
123+
// we have already queried. If a user_id is repeated, we will just use uid that we got
124+
// in the previous query.
125+
for i, user := range tweet.Mention {
126+
var varName string
127+
if name, ok := usersMap[user.UserID]; ok {
128+
varName = name
129+
} else {
130+
varName = fmt.Sprintf("m%d", i+1)
131+
query[i+2] = fmt.Sprintf("%s as var(func: eq(user_id, %s))", varName, user.UserID)
132+
usersMap[user.UserID] = varName
133+
}
134+
135+
tweet.Mention[i].UID = fmt.Sprintf("uid(%s)", varName)
136+
}
137+
138+
finalQuery := fmt.Sprintf("query {%s}", strings.Join(query, "\n"))
139+
return finalQuery
140+
}
141+
131142
func runInserter(alphas []api.DgraphClient, c *y.Closer, tweets <-chan interface{}) {
132143
defer c.Done()
133144

@@ -159,10 +170,7 @@ func runInserter(alphas []api.DgraphClient, c *y.Closer, tweets <-chan interface
159170
// txn is not being discarded deliberately
160171
// defer txn.Discard()
161172

162-
if errTweet := updateFilteredTweet(ft, txn); errTweet != nil {
163-
atomic.AddUint32(&stats.ErrorsDgraph, 1)
164-
continue
165-
}
173+
queryStr := buildQuery(ft)
166174

167175
tweet, err := json.Marshal(ft)
168176
if err != nil {
@@ -178,8 +186,16 @@ func runInserter(alphas []api.DgraphClient, c *y.Closer, tweets <-chan interface
178186
// only ONE retry attempt is made
179187
retry := true
180188
RETRY:
181-
apiMutation := &api.Mutation{SetJson: tweet, CommitNow: commitNow}
182-
_, err = txn.Mutate(context.Background(), apiMutation)
189+
apiUpsert := &api.Request{
190+
Mutations: []*api.Mutation{
191+
&api.Mutation{
192+
SetJson: tweet,
193+
},
194+
},
195+
CommitNow: commitNow,
196+
Query: queryStr,
197+
}
198+
_, err = txn.Do(context.Background(), apiUpsert)
183199
switch {
184200
case err == nil:
185201
if commitNow {
@@ -262,114 +278,6 @@ func filterTweet(jsn interface{}) (*twitterTweet, error) {
262278
}, nil
263279
}
264280

265-
func updateFilteredTweet(ft *twitterTweet, txn *dgo.Txn) error {
266-
// first ensure that tweet doesn't exists
267-
resp, err := txn.QueryWithVars(context.Background(), cDgraphTweetQuery,
268-
map[string]string{"$tweetID": ft.IDStr})
269-
if err != nil {
270-
return err
271-
}
272-
var r struct {
273-
All []struct {
274-
UID string `json:"uid"`
275-
} `json:"all"`
276-
}
277-
if err := json.Unmarshal(resp.Json, &r); err != nil {
278-
return err
279-
}
280-
281-
// possible duplicate, shouldn't happen
282-
if len(r.All) > 0 {
283-
log.Println("found duplicate tweet with id:", ft.IDStr)
284-
return errShouldNotReach
285-
}
286-
287-
// map to check for duplicates
288-
users := make(map[string]string)
289-
290-
userID := ft.Author.UserID
291-
if u, err := queryUser(txn, &ft.Author); err != nil {
292-
return err
293-
} else if u != nil {
294-
ft.Author = *u
295-
}
296-
users[userID] = ft.Author.UID
297-
298-
userMentions := make([]twitterUser, 0)
299-
for i, m := range ft.Mention {
300-
if dup, ok := users[m.UserID]; ok && dup != "" {
301-
userMentions = append(userMentions, twitterUser{UID: dup})
302-
continue
303-
} else if ok && dup == "" {
304-
// TODO: find a way to not ignore this mention
305-
continue
306-
}
307-
308-
userID := m.UserID
309-
if u, err := queryUser(txn, &m); err != nil {
310-
return err
311-
} else if u != nil {
312-
ft.Mention[i] = *u
313-
}
314-
userMentions = append(userMentions, ft.Mention[i])
315-
users[userID] = m.UID
316-
}
317-
ft.Mention = userMentions
318-
319-
return nil
320-
}
321-
322-
func equalsUser(src, dst *twitterUser) bool {
323-
return src.UserID == dst.UserID &&
324-
src.UserName == dst.UserName &&
325-
src.ScreenName == dst.ScreenName &&
326-
src.Description == dst.Description &&
327-
src.FriendsCount == dst.FriendsCount &&
328-
src.Verified == dst.Verified &&
329-
src.ProfileBannerURL == dst.ProfileBannerURL &&
330-
src.ProfileImageURL == dst.ProfileImageURL
331-
}
332-
333-
func queryUser(txn *dgo.Txn, src *twitterUser) (*twitterUser, error) {
334-
resp, err := txn.QueryWithVars(context.Background(), cDgraphUserQuery,
335-
map[string]string{"$userID": src.UserID})
336-
if err != nil {
337-
return nil, err
338-
}
339-
340-
var r struct {
341-
All []twitterUser `json:"all"`
342-
}
343-
if err := json.Unmarshal(resp.Json, &r); err != nil {
344-
return nil, err
345-
}
346-
347-
if len(r.All) > 1 {
348-
log.Println("found duplicate users in Dgraph with id:", r.All[0].UserID)
349-
return nil, errShouldNotReach
350-
} else if len(r.All) == 0 {
351-
return nil, nil
352-
} else if len(r.All) == 1 && !equalsUser(src, &r.All[0]) {
353-
return &r.All[0], nil
354-
} else {
355-
return &twitterUser{UID: r.All[0].UID}, nil
356-
}
357-
}
358-
359-
func getTrends(id int64, api *anaconda.TwitterApi) ([]string, error) {
360-
resp, err := api.GetTrendsByPlace(id, nil)
361-
if err != nil {
362-
return nil, err
363-
}
364-
365-
trends := make([]string, len(resp.Trends))
366-
for i, t := range resp.Trends {
367-
trends[i] = t.Name
368-
}
369-
370-
return trends, nil
371-
}
372-
373281
func readCredentials(path string) twitterCreds {
374282
jsn, err := ioutil.ReadFile(path)
375283
checkFatal(err, "Unable to open twitter credentials file '%s'", path)

0 commit comments

Comments
 (0)