Skip to content

Commit

Permalink
add GetUserNameFromText
Browse files Browse the repository at this point in the history
  • Loading branch information
caiguanhao committed Sep 22, 2021
1 parent b9e8e6b commit 09ee14d
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 0 deletions.
70 changes: 70 additions & 0 deletions name.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package igslim

import (
"regexp"
"strings"
)

const (
reUserNameStr = `[A-Za-z0-9_][A-Za-z0-9_.]{0,28}[A-Za-z0-9_]`
)

var (
reIg = regexp.MustCompile(`(?i)(?:^|[^\w])(ig|instagram|insta|ins)(?:[^\w]|$)`)
reUserNameOn = regexp.MustCompile(`(?i)(^|[^\w])(@?)(` + reUserNameStr + `)(\s+on\s+)$`)
reUserNameAt = regexp.MustCompile(`(?:^|[^\w])@(` + reUserNameStr + `)`)
reUserName = regexp.MustCompile(reUserNameStr)
)

// GetUserNameFromText extracts Instagram user name from "text" (usually the
// biography of a social media account) using regular expressions. Because of
// too many edge cases, this function does not guarantee the accuracy of the
// extracted user names. The "username" argument is returned if Instagram is
// mentioned but no user name is found. Empty string is returned if no user
// name is found.
func GetUserNameFromText(text, username string) string {
loc := reIg.FindStringSubmatchIndex(text)
if len(loc) < 4 {
return ""
}

// follow * on Instagram
if m := reUserNameOn.FindStringSubmatch(text[:loc[2]]); len(m) > 0 {
// CAN'T DECIDE: this is very likely an English word instead of
// username if it does not start with @ and its length is less than 8
if m[2] == "@" || len(m[3]) >= 8 {
if m[1] != "@" {
return m[3]
}
}
}

text = text[loc[3]:]

var hasColon bool
if i := strings.IndexAny(text, ":-=:>👉"); i > -1 {
text = text[i:]
hasColon = true
}

if m := reUserNameAt.FindStringSubmatch(text); len(m) > 0 {
return m[1]
}

if hasColon {
loc := reUserName.FindStringIndex(text)
if len(loc) == 2 {
if loc[1]+1 <= len(text) && text[loc[1]:loc[1]+1] == "@" {
// more like an email address
return username
}
// CAN'T DECIDE: this is very likely an English word instead of
// username if it does not start with @ and its length is less than 8
if name := text[loc[0]:loc[1]]; len(name) >= 8 {
return name
}
}
}

return username
}
47 changes: 47 additions & 0 deletions name_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package igslim

import "testing"

const originalUserName = "original"

var testcases = map[string]string{
"Follow Me On IG - @example": "example",
"Follow the IG: EXAMPLE_01": "EXAMPLE_01",
"follow my insta >username_ business": "username_",
"INSTA=example__official example@example.com": "example__official",
"IG: @foo_bar foo@bar.com": "foo_bar",
"Instagram: loremipsum YouTube:LoremIpsum": "loremipsum",
"Insta:Some_name": "Some_name",
"IG: hello_world yes": "hello_world",
"Follow our Twitter @ourtwitter": "",
"Follow us on Instagram and YouTube": originalUserName,
"Insta/YouTube: James002": "James002",
"Insta / YouTube: foobar123": "foobar123",
"Instagram:@example email: example@gmail.com": "example",
"Get my IG to 100K @foobar!": "foobar",
"Insta • Youtube • Blog": originalUserName,
"alright": "",
"BIG": "",
"IG": originalUserName,
"Insta ↓ Thank you": originalUserName,
"Follow me on Instagram: example@gmail.com": originalUserName,
"Follow me on Instagram: example@": originalUserName,
"Follow me on Instagram: example003": "example003",
"Why not follow helloworld on Instagram?": "helloworld",
"Why not follow @example on IG?": "example",
"leaked face on IG": originalUserName,
"face reveal on instagram": originalUserName,
"Why not follow foo@example.com on IG?": originalUserName,
"helloworld ON IG": "helloworld",
"insta👉hello_world": "hello_world",
"insta -15%": originalUserName,
}

func TestGetUserNameFromText(t *testing.T) {
for text, expected := range testcases {
actual := GetUserNameFromText(text, originalUserName)
if actual != expected {
t.Errorf("GetUserNameFromText(%s) should return %s instead of %s", text, expected, actual)
}
}
}

0 comments on commit 09ee14d

Please sign in to comment.