In [27]:
from google.cloud import texttospeech

# Instantiates a client
client = texttospeech.TextToSpeechClient()

# Build the voice request, select the language code ("en-US") and the ssml
# voice gender ("neutral")
voice = texttospeech.types.VoiceSelectionParams(
    language_code='en-US', 
    ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL, name='en-US-Wavenet-D')

# Select the type of audio file you want returned
audio_config = texttospeech.types.AudioConfig(
    audio_encoding=texttospeech.enums.AudioEncoding.MP3)

def get_pronounciation(word, word_id):
    # Set the text input to be synthesized
    synthesis_input = texttospeech.types.SynthesisInput(text=word)
    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(synthesis_input, voice, audio_config)
    outfile = os.path.join(outdir, word_id + '.mp3')
    # The response's audio_content is binary.
    with open(outfile, 'wb') as out:
        # Write the response to the output file.
        out.write(response.audio_content)
    return os.path.basename(outfile)

In [61]:
import os
deck = 'idioms'
indir  = 'idioms2'
outdir = 'media_dir'
textdir = 'text_dir'
os.makedirs(outdir, exist_ok=True)
os.makedirs(textdir, exist_ok=True)
separator = '\n\n################################\n\n'
print('media data in: ', outdir, 'text data in: ', textdir)
print('')

media data in:  media_dir text data in:  text_dir



In [62]:
import requests
import json
import glob
import cv2
import shutil
import os

base_url = 'https://od-api.oxforddictionaries.com/api/v1'
app_id = '88b408cd'
app_key = '8c3ae542c22acf7770e5f7cf67b84bf9'
language = 'en'
region='us'
trouble = []

def get_headword(word):
    url = 'https://od-api.oxforddictionaries.com:443/api/v1/search/' + language + '?q=' + word.lower() + '&prefix=false' + '&regions=' + region
    r = requests.get(url, headers = {'app_id': app_id, 'app_key': app_key})
    if r.status_code != 200:
        print('[Get headword] Invalid status code: ', word, r.status_code)
        return None
    for entry in r.json()['results']:
        if entry['region'] == 'us':
            return entry
    return None

def get_word_info(word_id):
    url = 'https://od-api.oxforddictionaries.com:443/api/v1/entries/' 
    url += language + '/' + word_id.lower() + '/regions=' + region 
    r = requests.get(url, headers = {'app_id': app_id, 'app_key': app_key})
    if r.status_code != 200:
        print('[Get word info], Invalid status code: ', word_id, r.status_code)
        return None
    return r.json()
    
def get_card_meaning(word, card_id):
    res = {'card_id' : card_id, 'match_word' : word, 'word' : word, 'senses' : [], 'examples' : [],
           'pronounciation' : None, 'image' : None, 'headword' : None, 'ipa' : ''}
    
    entry = get_headword(word)
    if entry is None:
        return res
    
    headword = entry['id']
    res['headword'] = headword
    res['word'] = entry['word']
    info = get_word_info(headword)
    if info is None:
        return res
    
    entry = info['results'][0]['lexicalEntries'][0]
    senses = entry['entries'][0]['senses']
    
    if 'pronunciations' in entry.keys():
        pronounciations = entry['pronunciations']
        for p in pronounciations:
            if 'audioFile' in p.keys() and 'dialects' in p.keys() and 'American English' in p['dialects']:
                res['pronounciation'] = p['audioFile']
            if 'phoneticNotation' in p.keys() and 'IPA' in p['phoneticNotation']:
                if 'phoneticSpelling' in p.keys():
                    res['ipa'] = p['phoneticSpelling']
    def get_examples(sense):
        if 'examples' in sense.keys():
            return [e['text'] for e in sense['examples']]
        else:
            examples = []
            if 'subsenses' not in sense.keys():
                return examples
            for subsense in sense['subsenses']:
                if 'examples' in subsense:
                    examples = examples + [e['text'] for e in subsense['examples']]
            return examples
    def get_definitions(sense):
        if 'short_definitions' in sense:
            return sense['short_definitions'][0]
        else:
            return sense['definitions'][0]
    
    res['senses'] = [get_definitions(sense) for sense in senses]
    example_sets = [get_examples(sense) for sense in senses]
    res['examples'] = [e for s in example_sets for e in s]
    
    import time
    time.sleep(2)
    
    return res

def add_card_pronounciation(card):
    if card['pronounciation'] is not None:
        return card
    card['pronounciation'] = get_pronounciation(card['word'], card['card_id'] + '_wpron')
    return card

def add_card_image(card):
    if card['image'] is not None:
        return card
    for img in imgs:
        imgid = os.path.basename(img).split('.')[0].strip().lower()
        if card['match_word'] == imgid:
            imgfile = imgid + img[img.rfind('.')-1:]
            shutil.copy(img, os.path.join(outdir, imgfile))
            card['image'] = imgfile
            break
    return card

def add_card_examples(card):
    word_id = card['word']
    url = 'https://od-api.oxforddictionaries.com:443/api/v1/entries/' 
    url += language + '/' + word_id.lower() + '/sentences'
    r = requests.get(url, headers = {'app_id': app_id, 'app_key': app_key})
    if r.status_code != 200:
        print('[Get card examples], Invalid status code: ', word_id, r.status_code)
        return card   
    sentences = r.json()['results'][0]['lexicalEntries'][0]['sentences']
    def check_part_of_list(item, l):
        for e in l:
            if item in e:
                return True
        return False    
    
    examples = []
    for sent in sentences:
        if not check_part_of_list('American', sent['regions']):
            continue
        examples.append(sent['text'])
    for sent in sentences:
        if check_part_of_list('American', sent['regions']):
            continue
        examples.append('(GB) ' + sent['text'])
    card['examples'] = card['examples'] + examples[:10]
    
    return card

imgs = []
extensions = ['.jpg', '.png', '.jpeg']
for ext in extensions:
    imgs = imgs + glob.glob(os.path.join(indir, '*' + ext))
imgs = sorted(imgs, key=lambda e : os.path.basename(e))
print(len(imgs))

deck_final_file = deck + '_final.txt'
old_ids = set()
if os.path.isfile(deck_final_file):
    with open(deck_final_file) as f:
        deck_data = [json.loads(s) for s in f.read().split(separator)]
    old_ids = set([card['card_id'] for card in deck_data])

outfile = os.path.join(textdir, deck + indir + '_metadata.txt')
outfile = open(outfile, 'w')
for img in imgs:
    imgname = os.path.basename(img)
    word = imgname.split('.')[0].strip().lower()
    card_id = deck + '_' + '_'.join(word.strip().lower().split(' '))   
    if card_id in old_ids:
        print('OLDER CARD: ', card_id)
        continue
        
    card = get_card_meaning(word, card_id)
    card = add_card_pronounciation(card)
    card = add_card_image(card)
    card = add_card_examples(card)
    out = json.dumps(card, indent=2, sort_keys=True) + separator
    outfile.write(out)
    print(card['card_id'])
#     print(out)
outfile.close()

## check trouble cards
## search for examples
## skip processed

48
idioms_a_penny_for_your_thoughts
[Get card examples], Invalid status code:  -speak 404
idioms_actions_speak_louder_than_words
idioms_beat_around_the_bush
idioms_bite_off_more_than_you_can_chew
idioms_blow_off_steam
idioms_break_a_leg
idioms_break_the_ice
idioms_by_the_skin_of_teeth
idioms_comparing_apples_to_oranges
idioms_cram
idioms_don't_cry_over_spilt_milk
idioms_down_to_earth
idioms_feeling_blue;_have_the_blues
idioms_going_dutch
idioms_he_has_bigger_fish_to_fry
idioms_he's_a_chip_off_the_old_block
idioms_head_over_heels
idioms_hit_the_sack
idioms_jack_up_the_prices
idioms_miss_the_boat
idioms_pass_the_buck
idioms_pig_out
idioms_plead_the_fifth
idioms_put_something_on_ice
idioms_take_the_wind_out_of_your_sails
[Get card examples], Invalid status code:  cold shoulder 404
idioms_the_cold_shoulder
[Get card examples], Invalid status code:  well-begun 404
idioms_well_begun_is_half_done
idioms_wrap_(something)_up
idioms_wrap_your_head_around_something
idioms_you_can't_judge_a_book_b

In [47]:
from ipywidgets import widgets
import os
import IPython.display as ipd

# outdir = 'idioms2_data/'
# deck = 'idioms'

infile = os.path.join(textdir, deck + indir + '_metadata.txt')
trouble_file = os.path.join(textdir, deck + indir + '_trouble.txt')
update_file = os.path.join(textdir, deck + indir + '_update.txt')
deck_final_file = deck + '_final.txt'
separator = '\n\n################################\n\n'

for f in [update_file, trouble_file]
    f = open(f, 'w')
    f.close()
    
old_ids = {}
if os.path.isfile(deck_final_file):
    with open(deck_final_file) as f:
        deck_data = [json.loads(s) for s in f.read().split(separator) if len(s) > 0]
    old_ids = set([card['card_id'] for card in deck_data]) 
if os.path.isfile(trouble_file):
    with open(trouble_file) as f:
        deck_data = [json.loads(s) for s in f.read().split(separator) if len(s) > 0]
    old_ids.update([card['card_id'] for card in deck_data])    
    
cards = []
with open(infile) as f:
    data = f.read()
    card_strings = data.split(separator)
    
    for card_string in card_strings:
        if len(card_string) == 0:
            continue
        card = json.loads(card_string)
        if card['card_id'] in old_ids:
            continue
        cards.append(card)
        
def validate_card(i, card):
    for k, v in card.items():
        if v is None:
            card[k] = ''
    
    print('WORD')
    word = widgets.Text()
    word.value = card['word']
    display(word)
    
    print('SENSES')
    senses = []
    for text in card['senses']:
        sense = widgets.Textarea()
        sense.value = text
        clear = widgets.Button(
            value=False,
            description='clear',
            disabled=False,
            button_style='', #, 'warning', 'danger' or ''
            icon='check',
            layout=widgets.Layout(width='7%', height='25px')
        )
        def clear_output(field):
            def f(sender):
                field.value = ''
            return f
        def clear_button(button):
            def f(sender):
                button.value = False
            return f
        clear.on_click(clear_output(sense))
        sense.observe(clear_button(clear))
        sense_box = widgets.HBox([sense, clear])
        display(sense_box)
        senses.append(sense)
        
    print('EXAMPLES')
    examples = []
    for text in card['examples']:
        example = widgets.Textarea()
        example.value = text
        
        clear = widgets.Button(
            value=False,
            description='clear',
            disabled=False,
            button_style='', #, 'warning', 'danger' or ''
            icon='check',
            layout=widgets.Layout(width='7%', height='25px')
        )
        def clear_output(field):
            def f(sender):
                field.value = ''
            return f
        def clear_button(button):
            def f(sender):
                button.value = False
            return f
        clear.on_click(clear_output(example))
        example.observe(clear_button(clear))
        example_box = widgets.HBox([example, clear])
        display(example_box)
        examples.append(example)
       
    def update_image(path):
        imgfile = os.path.join(outdir, path)
        if os.path.isfile(imgfile):
            file = open(imgfile , "rb")
            picture = file.read()
            picture = widgets.Image(
                value=picture,
                format='png',
                width=300,
                height=400,
            )
            display(picture)
            
    def update_sound(path):
        fname = os.path.join(outdir, path)
        if os.path.isfile(fname):
            d = ipd.Audio(fname)
            display(d)
        else:
            print('url?', fname)
            
    print('IMAGE')
    image = widgets.Text()
    image.value = card['image']
    display(image)    
    update_image(image.value)
   
    print('SOUND')
    ipa = widgets.Text()
    ipa.value = card['ipa']
    display(ipa)
    sound = widgets.Text()
    sound.value = card['pronounciation']
    display(sound)    
    update_sound(sound.value)
    
    good = widgets.Button(
            value=False,
            description='GOOD',
            disabled=False,
            button_style='success', #, 'warning', 'danger' or ''
            icon='check',
            layout=widgets.Layout(width='15%', height='50px')
        )
    skip = widgets.Button(
            value=False,
            description='SKIP',
            disabled=False,
            button_style='warning', #, 'warning', 'danger' or ''
            icon='check',
            layout=widgets.Layout(width='15%', height='50px')
        )
    exit = widgets.Button(
            value=False,
            description='EXIT',
            disabled=False,
            button_style='danger', #, 'warning', 'danger' or ''
            icon='check',
            layout=widgets.Layout(width='15%', height='50px')
        )
    
    def show_next(i):
        if i < len(cards):
            validate_card(i, cards[i])
        else:
            print('NO MORE CARDS')
            print('check files: ', deck_final_file, update_file, trouble_file)
            
    def get_updated_card_data():
        card['word'] = word.value
        card['senses'] = [sense.value for sense in senses if len(sense.value) > 0]
        card['examples'] = [example.value for example in examples if len(example.value) > 0]
        card['image'] = image.value
        card['pronounciation'] = sound.value
        card['ipa'] = ipa.value
        return card
    
    def skip_f(sender):
        card = get_updated_card_data()
        with open(trouble_file, 'a') as f:
            f.write(json.dumps(card, indent=4) + separator)
        ipd.clear_output()
        show_next(i+1)
        
    def good_f(sender):
        card = get_updated_card_data()
        with open(deck_final_file, 'a') as f:
            f.write(json.dumps(card, indent=4) + separator)
        with open(update_file, 'a') as f:
            f.write(json.dumps(card, indent=4) + separator)
        ipd.clear_output()
        show_next(i+1)
    
    def exit_f(sender):
        print('EXIT')
        print('check files: ', deck_final_file, update_file, trouble_file)
        
    good.on_click(good_f)
    skip.on_click(skip_f)
    exit.on_click(exit_f)
    nav_buttons = widgets.HBox([skip, good, exit])   
    display(nav_buttons)
        
validate_card(0, cards[0])


WORD


Text(value='animosity')

SENSES


HBox(children=(Textarea(value='strong hostility'), Button(description='clear', icon='check', layout=Layout(hei…

EXAMPLES


HBox(children=(Textarea(value='he no longer felt any animosity toward her'), Button(description='clear', icon=…

HBox(children=(Textarea(value='the animosity between the king and his brother'), Button(description='clear', i…

HBox(children=(Textarea(value='the five decided to put aside their animosities'), Button(description='clear', …

HBox(children=(Textarea(value='(GB) To feel animosity for the country as he defines it would indeed be an indi…

HBox(children=(Textarea(value='(GB) Yet in spite of this long animosity, Confucianism and Buddhism unite in re…

HBox(children=(Textarea(value='(GB) Cynics argue that the space race was merely an expression of cold-war anim…

HBox(children=(Textarea(value='(GB) Pam says the people are lovely and you encounter very little public animos…

HBox(children=(Textarea(value='(GB) He is a gentleman and while we were on opposite sides, there was no animos…

HBox(children=(Textarea(value='(GB) Wilful misinterpretation of the law has bred animosity and resentment towa…

HBox(children=(Textarea(value='(GB) There is strong animosity between the two groups and each has its own cult…

HBox(children=(Textarea(value='(GB) The animosity at the time was directed at the principle, rather than the p…

HBox(children=(Textarea(value='(GB) She said she bore no animosity towards her stepson Gordon for what had hap…

HBox(children=(Textarea(value='(GB) But there is pressure for him to step aside now to clear the air of animos…

IMAGE


Text(value='animosityy.jpg')

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xfe\x00;CREATOR: gd-jpe…

SOUND


Text(value='http://audio.oxforddictionaries.com/en/mp3/animosity_us_1.mp3')

url? media_dir/http://audio.oxforddictionaries.com/en/mp3/animosity_us_1.mp3




In [66]:
# upload outfolder on google drive
ip = '192.168.82.80'
addr = 'http://192.168.82.80:8000/'

with open(deck_final_file) as f:
    deck_data = [json.loads(s) for s in f.read().split(separator) if len(s) > 0]
print('CARDS', len(deck_data))
out_content = []

headers = ['Text 1', 'Picture 2', 'Text 3', 'Sound 3', 'Text 4']
out_content.append(headers)
for card in deck_data:
    img = card['image']
    if '\t' in img:
        imgnew = img.replace('\t', '')
        if os.path.isfile(os.path.join(outdir, img)):
            shutil.move(os.path.join(outdir, img), os.path.join(outdir, imgnew))
        card['image'] = imgnew
    line = ['||'.join(card['senses']), card['image'], 
            card['word'] + '||' + card['ipa'], card['pronounciation'], '||'.join(card['examples'])]
    line = [l.strip() for l in line]
    out_content.append(line)
       
indices = [i for i in range(len(headers)) if 'Picture' in headers[i] or 'Sound' in headers[i]]
for idx, line in enumerate(out_content):
    if idx > 0:
        for i in indices:
            if line[i].startswith('http') is False:
                line[i] = addr + line[i]
    print('\t'.join(line))

CARDS 42
Text 1	Picture 2	Text 3	Sound 3	Text 4
used to ask someone what they are thinking about	http://192.168.82.80:8000/a penny for your thoughts.jpg	a penny for your thoughts||	http://192.168.82.80:8000/idioms_a_penny_for_your_thoughts_wpron.mp3	(GB) So, a penny for your thoughts here: what criteria, if any, should be applied in selecting names?||(GB) Next time someone offers you a penny for your thoughts… sell!
discuss matter without coming to point	http://192.168.82.80:8000/beat around the bush.jpg	beat around (or beat about) the bush||	http://192.168.82.80:8000/idioms_beat_around_the_bush_wpron.mp3	(GB) Let's not beat about the bush, the sort of people who drop litter are the sort who do not usually give a damn about anybody or anything.||(GB) There is no need to beat about the bush when talking to children - you can be more direct with them than you might think.
take on commitment one cannot fulfil	http://192.168.82.80:8000/bite off more than you can chew.jpeg	bite off more tha

In [67]:
out_content = []
headers = ['Text 1', 'Sound 1', 'Picture 2', 'Text 3',  'Text 4']
out_content.append(headers)

for card in deck_data:
    img = card['image']
    if '\t' in img:
        imgnew = img.replace('\t', '')
        if os.path.isfile(os.path.join(outdir, img)):
            shutil.move(os.path.join(outdir, img), os.path.join(outdir, imgnew))
        card['image'] = imgnew
    line = [card['word'] + '||' + card['ipa'], card['pronounciation'], card['image'], '||'.join(card['senses']), '||'.join(card['examples'])]
    line = [l.strip() for l in line]
    out_content.append(line)
       
indices = [i for i in range(len(headers)) if 'Picture' in headers[i] or 'Sound' in headers[i]]
for idx, line in enumerate(out_content):
    if idx > 0:
        for i in indices:
            if line[i].startswith('http') is False:
                line[i] = addr + line[i]
    print('\t'.join(line))

Text 1	Sound 1	Picture 2	Text 3	Text 4
a penny for your thoughts||	http://192.168.82.80:8000/idioms_a_penny_for_your_thoughts_wpron.mp3	http://192.168.82.80:8000/a penny for your thoughts.jpg	used to ask someone what they are thinking about	(GB) So, a penny for your thoughts here: what criteria, if any, should be applied in selecting names?||(GB) Next time someone offers you a penny for your thoughts… sell!
beat around (or beat about) the bush||	http://192.168.82.80:8000/idioms_beat_around_the_bush_wpron.mp3	http://192.168.82.80:8000/beat around the bush.jpg	discuss matter without coming to point	(GB) Let's not beat about the bush, the sort of people who drop litter are the sort who do not usually give a damn about anybody or anything.||(GB) There is no need to beat about the bush when talking to children - you can be more direct with them than you might think.
bite off more than one can chew||	http://192.168.82.80:8000/idioms_bite_off_more_than_you_can_chew_wpron.mp3	http://192.168.82