From a7508936478342f8d1cb16596b12c1bc3ea21b60 Mon Sep 17 00:00:00 2001 From: Parsha Pourkhomami Date: Mon, 17 Aug 2015 14:45:55 -0700 Subject: [PATCH 1/4] Twitter: Preserve commit message whitespace When tweeting commit messages, preserve the original commit message whitespace and tweet it as-is. Previously, String#split(" ") was used to process words, which splits on *any* whitespace and with runs of contiguous whitespace characters ignored [1]. When the split array was turned back into a string with Array#join(" "), original whitespace characters were lost and replaced by a single space character. Now, String#gsub and a regular expression is used to process words, which will preserve original whitespace. Fixes #959. [1] http://ruby-doc.org/core-2.2.0/String.html#method-i-split --- lib/services/twitter.rb | 6 +++--- test/twitter_test.rb | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/lib/services/twitter.rb b/lib/services/twitter.rb index 42cfe54f6..accbe06e9 100644 --- a/lib/services/twitter.rb +++ b/lib/services/twitter.rb @@ -42,9 +42,9 @@ def receive_push url = commit['url'] # Strip out leading @s so that github @ mentions don't become twitter @ mentions # since there's zero reason to believe IDs on one side match IDs on the other - message = commit['message'].split(' ').map do |word| - (word.length > 1 && word[0] == '@') ? "@\u200b#{word[1..word.length]}" : word - end.join(' ') + message = commit['message'].gsub(/\B@[^ ]+/) do |word| + "@\u200b#{word[1..word.length]}" + end status = if short_format? "#{url} #{message}" else diff --git a/test/twitter_test.rb b/test/twitter_test.rb index 001ff6af3..b4f521c98 100644 --- a/test/twitter_test.rb +++ b/test/twitter_test.rb @@ -52,6 +52,24 @@ def svc.post(status) end end + # Make sure that whitespace in the original commit message is preserved + def test_whitespace + p = payload + p['commits'][0]['message']="message \nwith\n\n weird whitespace " + svc = service({'token' => 't', 'secret' => 's'}, p) + + def svc.statuses + @statuses ||= [] + end + + def svc.post(status) + statuses << status + end + + svc.receive_push + assert svc.statuses[0].match(p['commits'][0]['message']) + end + # Make sure that GitHub @mentions are injected with a zero-width space # so that they don't turn into (potentially unmatching) twitter @mentionds def test_mentions From a1498ec29871ece4dace73fcb002ff106727a536 Mon Sep 17 00:00:00 2001 From: Parsha Pourkhomami Date: Wed, 19 Aug 2015 12:40:44 -0700 Subject: [PATCH 2/4] Twitter: Better match @ mentions Change regex to be more specific and look for the "@" symbol followed by a word character instead of any non-space character. Add tests for this behavior. See https://github.com/github/github-services/pull/1075/files#r37456732 --- lib/services/twitter.rb | 2 +- test/twitter_test.rb | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/services/twitter.rb b/lib/services/twitter.rb index accbe06e9..715d92327 100644 --- a/lib/services/twitter.rb +++ b/lib/services/twitter.rb @@ -42,7 +42,7 @@ def receive_push url = commit['url'] # Strip out leading @s so that github @ mentions don't become twitter @ mentions # since there's zero reason to believe IDs on one side match IDs on the other - message = commit['message'].gsub(/\B@[^ ]+/) do |word| + message = commit['message'].gsub(/\B@[[:word:]]+/) do |word| "@\u200b#{word[1..word.length]}" end status = if short_format? diff --git a/test/twitter_test.rb b/test/twitter_test.rb index b4f521c98..8b23f90d7 100644 --- a/test/twitter_test.rb +++ b/test/twitter_test.rb @@ -76,7 +76,7 @@ def test_mentions p = payload p['commits'][0]['message']="This commit was done by @sgolemon" p['commits'][1]['message']="@sgolemon committed this" - p['commits'][2]['message']="@sgolemon made a test for @kdaigle" + p['commits'][2]['message']="@sgolemon made a @ @\ttest for @kdaigle" svc = service({'token' => 't', 'secret' => 's'}, p) def svc.statuses @@ -90,9 +90,11 @@ def svc.post(status) svc.receive_push assert_equal 3, svc.statuses.size svc.statuses.each do |st| - # Any @ which is not followed by U+200B ZERO WIDTH SPACE - # is an error - assert !st.match('@(?!\u200b)') + # Any @ which is followed by a word character is an error + assert !st.match('@(?=[[:word:]])') + # Any @ which is followed by a U+200b ZERO WIDTH SPACE but not a word + # character is an error + assert !st.match('@(?=\u200b[^[:word:]])') end end From 3a47b1c251c93a99404dd393140124f309ca05fb Mon Sep 17 00:00:00 2001 From: Parsha Pourkhomami Date: Wed, 19 Aug 2015 13:04:29 -0700 Subject: [PATCH 3/4] =?UTF-8?q?Twitter:=20Match=20=EF=BC=A0=20U+FF20=20FUL?= =?UTF-8?q?LWIDTH=20COMMERCIAL=20AT?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Twitter matches the U+FF20 character (@) in addition to the normal ASCII @ character. See https://github.com/twitter/twitter-text/blob/141759ce97926772d02c257dead8d27379d16dbb/rb/lib/twitter-text/regex.rb#L118 --- lib/services/twitter.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/services/twitter.rb b/lib/services/twitter.rb index 715d92327..44fc239d8 100644 --- a/lib/services/twitter.rb +++ b/lib/services/twitter.rb @@ -42,7 +42,7 @@ def receive_push url = commit['url'] # Strip out leading @s so that github @ mentions don't become twitter @ mentions # since there's zero reason to believe IDs on one side match IDs on the other - message = commit['message'].gsub(/\B@[[:word:]]+/) do |word| + message = commit['message'].gsub(/\B[@@][[:word:]]+/) do |word| "@\u200b#{word[1..word.length]}" end status = if short_format? From 02e86b87473423eee683387cb5ae546bd198926b Mon Sep 17 00:00:00 2001 From: Parsha Pourkhomami Date: Wed, 19 Aug 2015 13:09:53 -0700 Subject: [PATCH 4/4] Twitter: Match only a single character Don't need to match the entire word, just the first word character after the `@`. --- lib/services/twitter.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/services/twitter.rb b/lib/services/twitter.rb index 44fc239d8..c009ae479 100644 --- a/lib/services/twitter.rb +++ b/lib/services/twitter.rb @@ -42,7 +42,7 @@ def receive_push url = commit['url'] # Strip out leading @s so that github @ mentions don't become twitter @ mentions # since there's zero reason to believe IDs on one side match IDs on the other - message = commit['message'].gsub(/\B[@@][[:word:]]+/) do |word| + message = commit['message'].gsub(/\B[@@][[:word:]]/) do |word| "@\u200b#{word[1..word.length]}" end status = if short_format?