# 072520 Cumber tests

In [85]:
import cv2
import numpy as np
import requests

from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt

In [148]:
def _cumberify(f):
    img = cv2.imdecode(np.frombuffer(f.read(), np.uint8), 1)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # convert to hsv colorspace because we get better accuracy?
    lower_green = np.array([25,50,50])
    upper_green = np.array([80,255,255]) # took too damn long to find these values
    mask = cv2.inRange(hsv, lower_green, upper_green) # create mask for all greens and yellows
    mask = mask/255
    mask = mask.astype(np.bool)
    
    cumbered = np.argwhere(mask) # get idxs of green pixels
    
    # get rectangle coords
    start = (cumbered[0][1], cumbered[0][0])
    end = (cumbered[round(len(cumbered)*0.5)][1], cumbered[round(len(cumbered)*0.5)][0])
    width = end[0] - start[0]
    length = end[1] - start[1]
    
    # draw a rectangle around part of the cucumber (20% looks too small in most cases)
    cv2.rectangle(img, start, end, (0,0,0), -1)
    
    # get censored coords
    C_OFF = 0.8
    censored_start = (start[0] + int((width-width*C_OFF)/2), start[1] + int((length-length*C_OFF)/2))
    
    # put censored image on
    img_PIL = Image.fromarray(img)
    img_PIL.show()
    censored = Image.open('../images/censored.png')
    img_PIL.paste(censored.resize((int(width*C_OFF),int(length*C_OFF))), censored_start)
#     img_PIL.show()
    
#     img = cv2.addWeighted(img, 0.4, censored, 0.1, 0)

#     _, buffer = cv2.imencode(".jpg", img)
    
    return img

In [149]:
f = open('test2.jpeg', 'rb')
modified_cumber = _cumberify(f)

img = Image.fromarray(modified_cumber)
# img.show()

In [135]:
(1,2)*2

(1, 2, 1, 2)

# 072620 Verbosify Tests

In [384]:
import random
import re

from nltk import pos_tag
from nltk.corpus import wordnet

In [385]:
input_sentence = 'you think we will achieve this win, brothers?'
new_sentence = ''

# go through every word in sentence
for word in re.findall(r"\w+|[^\w\s]", input_sentence):
    # punctuation
    if re.match(r"[^\w\s]", word):
        new_sentence += word
        continue
    
    # look for synonym until we find unique one
    while True:
        synsets = wordnet.synsets(word)

        # no synonyms
        if not synsets:
            new_sentence += ' ' + word
            break
            
        # choose random synonym for random synset
        synonym = random.choice(random.choice(synsets).lemmas()).name()
        if synonym.lower() not in word.lower():
            new_sentence += ' ' + synonym
            break


print('original:', input_sentence)
print('verbosified:', new_sentence[1:])

original: you think we will achieve this win, brothers?
verbosified: you conceive we volition accomplish this winnings, buddy?


## Parts of Speech exploration

In [102]:
# https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
# or run: `nltk.help.upenn_tagset()`
all_tags = ['CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNS', 'NNP', 'NNPS',
            'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH',
            'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB']

# https://stackoverflow.com/questions/15586721/wordnet-lemmatization-and-pos-tagging-in-python
# https://linguistics.stackexchange.com/questions/6508/which-part-of-speech-are-s-and-r-in-wordnet
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'): return 'as'
    elif treebank_tag.startswith('V'): return 'v'
    elif treebank_tag.startswith('N'): return 'n'
    elif treebank_tag.startswith('R'): return 'r'
    else: return ''

In [4]:
def print_syns(word):
    if not wordnet.synsets(word):
        print('no synonyms found')
        return

    for syn in wordnet.synsets(word):
        print(syn.name() + ':')
        for lemma in syn.lemmas():
            print(lemma)

        print()

In [5]:
tokenized_sentence = re.findall(r"\w+|[^\w\s]", 'hop in the voice channel and find out for yourself')
print('parts of speech:', nltk.pos_tag(tokenized_sentence))

print()

tokenized_sentence = ['hop', 'in', 'the', 'voice', 'channel', 'and', 'find_out', 'for', 'yourself', '!']
print('parts of speech:', nltk.pos_tag(tokenized_sentence))

parts of speech: [('hop', 'NN'), ('in', 'IN'), ('the', 'DT'), ('voice', 'NN'), ('channel', 'NN'), ('and', 'CC'), ('find', 'VB'), ('out', 'RP'), ('for', 'IN'), ('yourself', 'PRP')]

parts of speech: [('hop', 'NN'), ('in', 'IN'), ('the', 'DT'), ('voice', 'NN'), ('channel', 'NN'), ('and', 'CC'), ('find_out', 'NN'), ('for', 'IN'), ('yourself', 'NN'), ('!', '.')]


In [86]:
wordnet.synset('beautiful.a.01').lemmas()[0].antonyms()

[Lemma('ugly.a.01.ugly')]

## Test parts of speech

In [386]:
whitelist = {'a/DT': ['an', 'the'],
             'an/DT': ['a', 'the'],
             'the/DT': ['a', 'an'],
             'I/PRP': ['ur boy', 'me, myself and I', 'yours truly'],
             'me/PRP': 'I/PRP',
             'you/PRP': ['thou', 'thoust'],
             'will/MD': ['shall', 'shalt']}


def verbosify2(input_sentence, num_tries = 30):
    word_list = []

    # go through every word    
    for word, pos in pos_tag(re.findall(r"\w+|[^\w\s]", input_sentence)):
        # punctuation, whitelist, or normal word
        if re.match(r"[^\w\s]", word): word_list.append(word)
        elif word+'/'+pos in whitelist: word_list.append(get_whitelist_synonym(word, pos))
        else: word_list.append(get_synonym(word, get_wordnet_pos(pos)))

    return join_sentence(word_list)


def get_synonym(word, pos):
    synsets = wordnet.synsets(word)
    synonyms = []

    # loop through all synsets
    for synset in synsets:
        # don't check synset if wrong part of speech
        if synset.name().split('.')[1] not in pos: continue

        # loop through each synonym
        for synonym in synset.lemmas():
            synonym = synonym.name()
            if synonym != word and synonym not in synonyms: synonyms.append(synonym)
    
    # no unique synonyms?
    if not synsets or not synonyms: return word
    # otherwise, choose random synonym
    return random.choice(synonyms)

def get_whitelist_synonym(word, pos):
    synonyms = whitelist[word+'/'+pos]
    if isinstance(synonyms, list): return random.choice(synonyms + [word])
    else: return random.choice(whitelist[synonyms] + [word]) # reference to another entry
    

def join_sentence(word_list):
    new_sentence = ''
    
    for word in word_list:
        if re.match(r"[^\w\s]", word): new_sentence += word
        else: new_sentence += ' ' + word.replace('_', ' ')
            
    return new_sentence[1:]

In [390]:
sim = 20

input_sentence = 'I would like a hamburger cooked just for me'
for i in range(sim):
    print(verbosify2(input_sentence))

me, myself and I would care the ground beef misrepresent precisely for yours truly
I would care a burger wangle but for me
ur boy would wish an beefburger ready precisely for me, myself and I
I would care the ground beef falsify but for ur boy
me, myself and I would care a burger fix barely for me, myself and I
me, myself and I would care the ground beef cook hardly for ur boy
yours truly would care a ground beef make scarcely for ur boy
yours truly would care the ground beef fudge exactly for me
yours truly would care an burger misrepresent simply for ur boy
ur boy would wish an ground beef fudge precisely for yours truly
yours truly would care an beefburger falsify simply for me
yours truly would care a beefburger manipulate just now for ur boy
me, myself and I would care a ground beef wangle just now for me
ur boy would care the beefburger fix merely for me
I would wish an beefburger ready hardly for ur boy
I would care an beefburger cook merely for me, myself and I
ur boy would car

In [392]:
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/william/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [380]:
args = 'I would like a hamburger cooked just for me'.split()

In [383]:
' '.join(args)

'I would like a hamburger cooked just for me'