-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b9e8e6b
commit 09ee14d
Showing
2 changed files
with
117 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package igslim | ||
|
||
import ( | ||
"regexp" | ||
"strings" | ||
) | ||
|
||
const ( | ||
reUserNameStr = `[A-Za-z0-9_][A-Za-z0-9_.]{0,28}[A-Za-z0-9_]` | ||
) | ||
|
||
var ( | ||
reIg = regexp.MustCompile(`(?i)(?:^|[^\w])(ig|instagram|insta|ins)(?:[^\w]|$)`) | ||
reUserNameOn = regexp.MustCompile(`(?i)(^|[^\w])(@?)(` + reUserNameStr + `)(\s+on\s+)$`) | ||
reUserNameAt = regexp.MustCompile(`(?:^|[^\w])@(` + reUserNameStr + `)`) | ||
reUserName = regexp.MustCompile(reUserNameStr) | ||
) | ||
|
||
// GetUserNameFromText extracts Instagram user name from "text" (usually the | ||
// biography of a social media account) using regular expressions. Because of | ||
// too many edge cases, this function does not guarantee the accuracy of the | ||
// extracted user names. The "username" argument is returned if Instagram is | ||
// mentioned but no user name is found. Empty string is returned if no user | ||
// name is found. | ||
func GetUserNameFromText(text, username string) string { | ||
loc := reIg.FindStringSubmatchIndex(text) | ||
if len(loc) < 4 { | ||
return "" | ||
} | ||
|
||
// follow * on Instagram | ||
if m := reUserNameOn.FindStringSubmatch(text[:loc[2]]); len(m) > 0 { | ||
// CAN'T DECIDE: this is very likely an English word instead of | ||
// username if it does not start with @ and its length is less than 8 | ||
if m[2] == "@" || len(m[3]) >= 8 { | ||
if m[1] != "@" { | ||
return m[3] | ||
} | ||
} | ||
} | ||
|
||
text = text[loc[3]:] | ||
|
||
var hasColon bool | ||
if i := strings.IndexAny(text, ":-=:>👉"); i > -1 { | ||
text = text[i:] | ||
hasColon = true | ||
} | ||
|
||
if m := reUserNameAt.FindStringSubmatch(text); len(m) > 0 { | ||
return m[1] | ||
} | ||
|
||
if hasColon { | ||
loc := reUserName.FindStringIndex(text) | ||
if len(loc) == 2 { | ||
if loc[1]+1 <= len(text) && text[loc[1]:loc[1]+1] == "@" { | ||
// more like an email address | ||
return username | ||
} | ||
// CAN'T DECIDE: this is very likely an English word instead of | ||
// username if it does not start with @ and its length is less than 8 | ||
if name := text[loc[0]:loc[1]]; len(name) >= 8 { | ||
return name | ||
} | ||
} | ||
} | ||
|
||
return username | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
package igslim | ||
|
||
import "testing" | ||
|
||
const originalUserName = "original" | ||
|
||
var testcases = map[string]string{ | ||
"Follow Me On IG - @example": "example", | ||
"Follow the IG: EXAMPLE_01": "EXAMPLE_01", | ||
"follow my insta >username_ business": "username_", | ||
"INSTA=example__official example@example.com": "example__official", | ||
"IG: @foo_bar foo@bar.com": "foo_bar", | ||
"Instagram: loremipsum YouTube:LoremIpsum": "loremipsum", | ||
"Insta:Some_name": "Some_name", | ||
"IG: hello_world yes": "hello_world", | ||
"Follow our Twitter @ourtwitter": "", | ||
"Follow us on Instagram and YouTube": originalUserName, | ||
"Insta/YouTube: James002": "James002", | ||
"Insta / YouTube: foobar123": "foobar123", | ||
"Instagram:@example email: example@gmail.com": "example", | ||
"Get my IG to 100K @foobar!": "foobar", | ||
"Insta • Youtube • Blog": originalUserName, | ||
"alright": "", | ||
"BIG": "", | ||
"IG": originalUserName, | ||
"Insta ↓ Thank you": originalUserName, | ||
"Follow me on Instagram: example@gmail.com": originalUserName, | ||
"Follow me on Instagram: example@": originalUserName, | ||
"Follow me on Instagram: example003": "example003", | ||
"Why not follow helloworld on Instagram?": "helloworld", | ||
"Why not follow @example on IG?": "example", | ||
"leaked face on IG": originalUserName, | ||
"face reveal on instagram": originalUserName, | ||
"Why not follow foo@example.com on IG?": originalUserName, | ||
"helloworld ON IG": "helloworld", | ||
"insta👉hello_world": "hello_world", | ||
"insta -15%": originalUserName, | ||
} | ||
|
||
func TestGetUserNameFromText(t *testing.T) { | ||
for text, expected := range testcases { | ||
actual := GetUserNameFromText(text, originalUserName) | ||
if actual != expected { | ||
t.Errorf("GetUserNameFromText(%s) should return %s instead of %s", text, expected, actual) | ||
} | ||
} | ||
} |