In [1]:
import requests
BASE_URL = 'http://localhost:5000'

def report(msg):
    requests.post(BASE_URL + '/spam/report',
             json={'message': msg}
            ).ok

def is_spam(vector):
    data = requests.get(BASE_URL + '/spam/detect',
                        params={'vector': ','.join(str(v) for v in vector)}
            ).json()
    return data['spam'], data['confidence']

def get_word_vectors(indexes):
    data = requests.get(BASE_URL + '/words/vector',
                        params={'ids': ','.join(str(i) for i in indexes)}).json()
    return {int(k): v['vector'] for k,v in data['words'].items()}

def get_word_indexes():
    """return a dict of word to index."""
    content = requests.get(BASE_URL + '/words/list').content
    return {word: i for i, word in enumerate(content.split('\n'))}

In [128]:
WORD_INDEXES = get_word_indexes()


def calc_message(message):
    import random
    import numpy as np
    
    message = message.lower()
    
    indexes = [WORD_INDEXES[word] 
               for word in message.split(' ')
               if word in WORD_INDEXES]
    fakes = [random.randint(0, len(WORD_INDEXES)) for _ in range(20)]
    
    shuffled = indexes + fakes
    random.shuffle(shuffled)
    vectors = get_word_vectors(shuffled)
    
    vec_sum = sum(np.array(vectors[i]) for i in indexes).tolist()

    return vec_sum

In [38]:
def is_spam_message(msg):
    return is_spam(calc_message(msg))

In [136]:
spams = [
    'Hi are you looking for a sexy cool woman to spend time ,with well I,m lookin for a gentleman who still khowns how to treat a lady, I am here to u.http . G.lovendate.pw code: 605',
    'I offer you an exchange, you free register on my site http:\/\/rachelmel.pro\nConfirmation email and show me the screenshot. \nAfter that I\'ll send you my nude pics',
    '''Hi, do I know u? you just showed up in my kik hmm.. my friends warned me that there are many fake accounts and bots here, no offense, are u a real person? If you are a real person, you won't have any trouble liking my pic, will you;)? the one where I'm wearing a white swimming suit. This way I'll be convinced that you are real'''
]

good = [
    'Hi I am your mother and I love you',
    'Did you watch the movie last night?',
    'Whats up with all the weird clouds?',
]

In [114]:
# register spam messages
for msg in spams:
    for i in range(4):
        report(msg)

In [115]:
is_spam_message(good[2])

(False, 0.8641516840020381)

In [117]:
is_spam_message(spams[1])

(True, 1.0)

In [129]:
is_spam_message('Hey Doody What is up?')

(False, 0.6815927618764492)

In [138]:
>>> import re
>>> string = spams[2]
>>> pattern = re.compile("^\s+|\s*[, \.\?]\s*|\s+$")
>>> print([x for x in pattern.split(string) if x])

['Hi', 'do', 'I', 'know', 'u', 'you', 'just', 'showed', 'up', 'in', 'my', 'kik', 'hmm', 'my', 'friends', 'warned', 'me', 'that', 'there', 'are', 'many', 'fake', 'accounts', 'and', 'bots', 'here', 'no', 'offense', 'are', 'u', 'a', 'real', 'person', 'If', 'you', 'are', 'a', 'real', 'person', 'you', "won't", 'have', 'any', 'trouble', 'liking', 'my', 'pic', 'will', 'you;)', 'the', 'one', 'where', "I'm", 'wearing', 'a', 'white', 'swimming', 'suit', 'This', 'way', "I'll", 'be', 'convinced', 'that', 'you', 'are', 'real']
