@@ -44,30 +44,6 @@ import (
4444const (
4545 cTimeFormat = "Mon Jan 02 15:04:05 -0700 2006"
4646 cDgraphTimeFormat = "2006-01-02T15:04:05.999999999+10:00"
47-
48- cDgraphTweetQuery = `
49- query all($tweetID: string) {
50- all(func: eq(id_str, $tweetID)) {
51- uid
52- }
53- }
54- `
55-
56- cDgraphUserQuery = `
57- query all($userID: string) {
58- all(func: eq(user_id, $userID)) {
59- uid
60- user_id
61- user_name
62- screen_name
63- description
64- friends_count
65- verified
66- profile_banner_url
67- profile_image_url
68- }
69- }
70- `
7147)
7248
7349var (
@@ -128,6 +104,41 @@ type twitterTweet struct {
128104 Retweet bool `json:"retweet"`
129105}
130106
107+ func buildQuery (tweet * twitterTweet ) string {
108+ tweetQuery := `t as var(func: eq(id_str, "%s"))`
109+ userQuery := `%s as var(func: eq(user_id, "%s"))`
110+
111+ query := make ([]string , len (tweet .Mention )+ 2 )
112+
113+ query [0 ] = fmt .Sprintf (tweetQuery , tweet .IDStr )
114+ tweet .UID = "uid(t)"
115+
116+ query [1 ] = fmt .Sprintf (userQuery , "u" , tweet .Author .UserID )
117+ tweet .Author .UID = "uid(u)"
118+
119+ usersMap := make (map [string ]string )
120+ usersMap [tweet .Author .UserID ] = "u"
121+
122+ // We will query only once for every user. We are storing all the users in the map who
123+ // we have already queried. If a user_id is repeated, we will just use uid that we got
124+ // in the previous query.
125+ for i , user := range tweet .Mention {
126+ var varName string
127+ if name , ok := usersMap [user .UserID ]; ok {
128+ varName = name
129+ } else {
130+ varName = fmt .Sprintf ("m%d" , i + 1 )
131+ query [i + 2 ] = fmt .Sprintf ("%s as var(func: eq(user_id, %s))" , varName , user .UserID )
132+ usersMap [user .UserID ] = varName
133+ }
134+
135+ tweet .Mention [i ].UID = fmt .Sprintf ("uid(%s)" , varName )
136+ }
137+
138+ finalQuery := fmt .Sprintf ("query {%s}" , strings .Join (query , "\n " ))
139+ return finalQuery
140+ }
141+
131142func runInserter (alphas []api.DgraphClient , c * y.Closer , tweets <- chan interface {}) {
132143 defer c .Done ()
133144
@@ -159,10 +170,7 @@ func runInserter(alphas []api.DgraphClient, c *y.Closer, tweets <-chan interface
159170 // txn is not being discarded deliberately
160171 // defer txn.Discard()
161172
162- if errTweet := updateFilteredTweet (ft , txn ); errTweet != nil {
163- atomic .AddUint32 (& stats .ErrorsDgraph , 1 )
164- continue
165- }
173+ queryStr := buildQuery (ft )
166174
167175 tweet , err := json .Marshal (ft )
168176 if err != nil {
@@ -178,8 +186,16 @@ func runInserter(alphas []api.DgraphClient, c *y.Closer, tweets <-chan interface
178186 // only ONE retry attempt is made
179187 retry := true
180188 RETRY:
181- apiMutation := & api.Mutation {SetJson : tweet , CommitNow : commitNow }
182- _ , err = txn .Mutate (context .Background (), apiMutation )
189+ apiUpsert := & api.Request {
190+ Mutations : []* api.Mutation {
191+ & api.Mutation {
192+ SetJson : tweet ,
193+ },
194+ },
195+ CommitNow : commitNow ,
196+ Query : queryStr ,
197+ }
198+ _ , err = txn .Do (context .Background (), apiUpsert )
183199 switch {
184200 case err == nil :
185201 if commitNow {
@@ -262,114 +278,6 @@ func filterTweet(jsn interface{}) (*twitterTweet, error) {
262278 }, nil
263279}
264280
265- func updateFilteredTweet (ft * twitterTweet , txn * dgo.Txn ) error {
266- // first ensure that tweet doesn't exists
267- resp , err := txn .QueryWithVars (context .Background (), cDgraphTweetQuery ,
268- map [string ]string {"$tweetID" : ft .IDStr })
269- if err != nil {
270- return err
271- }
272- var r struct {
273- All []struct {
274- UID string `json:"uid"`
275- } `json:"all"`
276- }
277- if err := json .Unmarshal (resp .Json , & r ); err != nil {
278- return err
279- }
280-
281- // possible duplicate, shouldn't happen
282- if len (r .All ) > 0 {
283- log .Println ("found duplicate tweet with id:" , ft .IDStr )
284- return errShouldNotReach
285- }
286-
287- // map to check for duplicates
288- users := make (map [string ]string )
289-
290- userID := ft .Author .UserID
291- if u , err := queryUser (txn , & ft .Author ); err != nil {
292- return err
293- } else if u != nil {
294- ft .Author = * u
295- }
296- users [userID ] = ft .Author .UID
297-
298- userMentions := make ([]twitterUser , 0 )
299- for i , m := range ft .Mention {
300- if dup , ok := users [m .UserID ]; ok && dup != "" {
301- userMentions = append (userMentions , twitterUser {UID : dup })
302- continue
303- } else if ok && dup == "" {
304- // TODO: find a way to not ignore this mention
305- continue
306- }
307-
308- userID := m .UserID
309- if u , err := queryUser (txn , & m ); err != nil {
310- return err
311- } else if u != nil {
312- ft .Mention [i ] = * u
313- }
314- userMentions = append (userMentions , ft .Mention [i ])
315- users [userID ] = m .UID
316- }
317- ft .Mention = userMentions
318-
319- return nil
320- }
321-
322- func equalsUser (src , dst * twitterUser ) bool {
323- return src .UserID == dst .UserID &&
324- src .UserName == dst .UserName &&
325- src .ScreenName == dst .ScreenName &&
326- src .Description == dst .Description &&
327- src .FriendsCount == dst .FriendsCount &&
328- src .Verified == dst .Verified &&
329- src .ProfileBannerURL == dst .ProfileBannerURL &&
330- src .ProfileImageURL == dst .ProfileImageURL
331- }
332-
333- func queryUser (txn * dgo.Txn , src * twitterUser ) (* twitterUser , error ) {
334- resp , err := txn .QueryWithVars (context .Background (), cDgraphUserQuery ,
335- map [string ]string {"$userID" : src .UserID })
336- if err != nil {
337- return nil , err
338- }
339-
340- var r struct {
341- All []twitterUser `json:"all"`
342- }
343- if err := json .Unmarshal (resp .Json , & r ); err != nil {
344- return nil , err
345- }
346-
347- if len (r .All ) > 1 {
348- log .Println ("found duplicate users in Dgraph with id:" , r .All [0 ].UserID )
349- return nil , errShouldNotReach
350- } else if len (r .All ) == 0 {
351- return nil , nil
352- } else if len (r .All ) == 1 && ! equalsUser (src , & r .All [0 ]) {
353- return & r .All [0 ], nil
354- } else {
355- return & twitterUser {UID : r .All [0 ].UID }, nil
356- }
357- }
358-
359- func getTrends (id int64 , api * anaconda.TwitterApi ) ([]string , error ) {
360- resp , err := api .GetTrendsByPlace (id , nil )
361- if err != nil {
362- return nil , err
363- }
364-
365- trends := make ([]string , len (resp .Trends ))
366- for i , t := range resp .Trends {
367- trends [i ] = t .Name
368- }
369-
370- return trends , nil
371- }
372-
373281func readCredentials (path string ) twitterCreds {
374282 jsn , err := ioutil .ReadFile (path )
375283 checkFatal (err , "Unable to open twitter credentials file '%s'" , path )
0 commit comments