In [1]:
import  requests
import json

In [3]:
API_CREDENTIALS = '../API_credentials/oxford_api_credentials.txt'
with open(API_CREDENTIALS) as f:
    creds = f.read()

In [4]:
api_creds = {}
creds.split('\n')
api_creds['Accept'] = creds.split('\n')[0].split(':')[1].strip()
api_creds['app_id'] = creds.split('\n')[1].split(':')[1].strip()
api_creds['app_key'] = creds.split('\n')[2].split(':')[1].strip()
api_creds['url'] = creds.split('\n')[3].split(':', 1)[1].strip()

api_creds

{'Accept': 'application/json',
 'app_id': '4474d16b',
 'app_key': '0006890f062872d487b651a89d2a62ee',
 'url': 'https://od-api.oxforddictionaries.com/api/v2'}

In [5]:
def json_load(res):
  t = json.dumps(res.json())
  return json.loads(t)

headers = {
        "Accept": api_creds['Accept'],
        "app_id": api_creds['app_id'],
        "app_key": api_creds['app_key']
    }
word_id = 'black'
url_entries = api_creds['url'] + '/entries/en/' + word_id + '?strictMatch=true'
res_entries = requests.get(url_entries, headers = {'app_id' : headers['app_id'], 'app_key' : headers['app_key']})

senses_examples = json_load(res_entries)

In [58]:
for res in senses_examples['results']:
    for ent in res['lexicalEntries']:
        for ex in ent['entries']:
            for sens in ex['senses']:
                print(sens.keys())

dict_keys(['definitions', 'examples', 'id', 'shortDefinitions', 'subsenses', 'synonyms', 'thesaurusLinks'])
dict_keys(['crossReferenceMarkers', 'crossReferences', 'definitions', 'examples', 'id', 'shortDefinitions', 'subsenses', 'variantForms'])
dict_keys(['definitions', 'examples', 'id', 'shortDefinitions', 'subsenses', 'synonyms', 'thesaurusLinks'])
dict_keys(['definitions', 'examples', 'id', 'shortDefinitions', 'synonyms', 'thesaurusLinks'])
dict_keys(['definitions', 'domainClasses', 'examples', 'id', 'regions', 'registers', 'shortDefinitions'])
dict_keys(['definitions', 'examples', 'id', 'notes', 'semanticClasses', 'shortDefinitions', 'subsenses'])
dict_keys(['crossReferenceMarkers', 'crossReferences', 'definitions', 'id', 'semanticClasses', 'shortDefinitions', 'subsenses', 'variantForms'])
dict_keys(['constructions', 'definitions', 'examples', 'id', 'notes', 'shortDefinitions', 'synonyms', 'thesaurusLinks'])
dict_keys(['definitions', 'examples', 'id', 'shortDefinitions', 'subsense

In [111]:
def json_load(res):
  t = json.dumps(res.json())
  return json.loads(t)

word_id = 'Washington'
url_entries = api_creds['url'] + '/entries/en/' + word_id + '?strictMatch=true'
res_entries = requests.get(url_entries, headers = {'app_id' : headers['app_id'], 'app_key' : headers['app_key']})

json_load(res_entries)

{'id': 'washington',
 'metadata': {'operation': 'retrieve',
  'provider': 'Oxford University Press',
  'schema': 'RetrieveEntry'},
 'results': [{'id': 'Washington',
   'language': 'en-gb',
   'lexicalEntries': [{'entries': [{'grammaticalFeatures': [{'id': 'proper',
         'text': 'Proper',
         'type': 'Properness'}],
       'pronunciations': [{'audioFile': 'https://audio.oxforddictionaries.com/en/mp3/washington_1_gb_1.mp3',
         'dialects': ['British English'],
         'phoneticNotation': 'IPA',
         'phoneticSpelling': 'ˈwɒʃɪŋt(ə)n'}],
       'senses': [{'definitions': ['a state of the north-western US, on the coast of the Pacific Ocean; population 6,549,224 (est. 2008); capital, Olympia. It became the 42nd state in 1889.'],
         'id': 'm_en_gbus1142220.006',
         'semanticClasses': [{'id': 'us_state', 'text': 'Us_State'}],
         'shortDefinitions': ['state of north-western US, on Pacific coast']},
        {'definitions': ['the capital of the US; population 

In [145]:
from pydantic import BaseModel, validator, Field
from typing import List, Dict, Optional
import numpy as np

class OxfordAPIResponse(BaseModel):
    id:str
    definition:str
    examples:Optional[List[str]] = Field(None, alias="examples")

    @validator('examples')
    def min_len_examples(cls, v):
        if not len(v) > 10:
            raise ValueError(
                'No'
            )
        return v

    class Config:
        allow_population_by_field_name = True

In [146]:
def json_load(res):
  t = json.dumps(res.json())
  return json.loads(t)

def prepare_api(word:str):
    headers = {
        "Accept": api_creds['Accept'],
        "app_id": api_creds['app_id'],
        "app_key": api_creds['app_key']
    }
    word_id = word
    url_entries = api_creds['url'] + '/entries/en/' + word_id + '?strictMatch=true'
    res_entries = requests.get(url_entries, headers = {'app_id' : headers['app_id'], 'app_key' : headers['app_key']})
    url_sentences = api_creds['url'] + '/sentences/en/' + word_id + '?strictMatch=true'
    res_sentences = requests.get(url_sentences, headers = {'app_id' : headers['app_id'], 'app_key' : headers['app_key']})

    senses_examples = json_load(res_entries)
    sentences_examples = json_load(res_sentences)

    return senses_examples, sentences_examples

def pop_items(sense_key:dict):
    keys_ = ['examples', 'definitions', 'id']
    for item in list(set(sense_key.keys()) ^ set(keys_)):
        sense_key.pop(item, None)

def run_words(word:str):
    from itertools import chain
    senses_examples, sentences_examples = prepare_api(word)

    try:
        senses_examples['results']
    except KeyError:
        raise ValueError(
            'No resutls for senses'
        )
    try:
        sentences_examples['results']
    except KeyError:
        raise ValueError(
            'No resutls for senteces'
        )

    senses_all_res = senses_examples['results']
    sentences_all_res = sentences_examples['results']

    senses = []
    oxford_word = {}
    sense_with_examples = {}
    diff_sense_ids = []


    for res_s in sentences_all_res:
            for ent in res_s['lexicalEntries']:
                for el in ent['sentences']:
                    diff_sense_ids.append(el['senseIds'][0])

    sense_ids = set(diff_sense_ids)

    def search(id):
        for res_s in sentences_all_res:
            for ent in res_s['lexicalEntries']:
                    return [sent['text'] for sent in ent['sentences'] if sent['senseIds'][0] == id]

    oxford_word['word'] = word

    for res in senses_all_res:
        for lent in res['lexicalEntries']:
            for ent in lent['entries']:
                for idx, sens in enumerate(ent['senses']):
                    try:
                        sense_with_examples['id'] = sens['id']
                        sense_with_examples['definition'] = sens['definitions'][0]
                        if 'examples' in sens.keys():
                            examples_for_senses = list(ex['text'] for ex in sens['examples'])
                        else:
                            continue

                        if sens['id'] in list(sense_ids):
                            examples_sense = search(sens['id'])
                            sense_with_examples['examples'] = list(chain(examples_sense, examples_for_senses))
                            if sense_with_examples['examples'] == []:
                                sense_with_examples.pop('examples', None)

                    except KeyError:
                        raise ValueError(
                            'No examples for the word: {}'.format(word)
                        )

                    # senses.append(sense_with_examples.copy())
                    try:
                        yield OxfordAPIResponse(**sense_with_examples).dict().copy()
                    except ValueError:
                        continue

In [147]:
senses = list(run_words('abuse'))

In [148]:
senses

[{'id': 'm_en_gbus0003740.006',
  'definition': 'use (something) to bad effect or for a bad purpose; misuse',
  'examples': ['In the election of 2000, the party in effect abused the judicial power to seize the presidency for itself, and this time the attempt succeeded.',
   'He is already facing impeachment over claims that he misused public money and abused his office since coming to power a year ago.',
   'That does not make sense, that is not logical, and the judge has abused his powers.',
   "By abusing people's willingness to respond to emergencies, you make them less likely to respond to them at all.",
   "Last year in Parliament, Labor's Craig Emerson accused insurance companies of abusing their market power over small smash repairers.",
   'Parents are abusing the new guidelines to save money on childminding.',
   'Because of their unlimited power, some consuls abused their authority.',
   'He abused his position of power to engage in a 3-year affair with a married woman, possi

In [123]:
def pop_items(sense_key:dict):
    keys_ = ['examples', 'definitions', 'id']
    for item in list(set(sense_key.keys()) ^ set(keys_)):
        sense_key.pop(item, None)

all_senses = {}
all_senses['word'] = 'abuse'
def word_to_dict():
    for res in senses:
        for lent in res['lexicalEntries']:
            for ent in lent['entries']:
                for sens in ent['senses']:
                    # print(sens)
                    # pop_items(sens)
                    yield OxfordAPIResponse(**sens).dict().copy()


all_senses['senses'] = list(word_to_dict())
all_senses['senses']



[{'id': 'm_en_gbus0003740.006',
  'definitions': ['use (something) to bad effect or for a bad purpose; misuse'],
  'examples': [{'text': 'the judge abused his power by imposing the fines'}]},
 {'id': 'm_en_gbus0003740.014',
  'definitions': ['treat with cruelty or violence, especially regularly or repeatedly'],
  'examples': [{'text': 'riders who abuse their horses should be prosecuted'}]},
 {'id': 'm_en_gbus0003740.019',
  'definitions': ['speak to (someone) in an insulting and offensive way'],
  'examples': [{'text': 'the referee was abused by players from both teams'}]},
 {'id': 'm_en_gbus0003740.025',
  'definitions': ['the improper use of something'],
  'examples': [{'text': 'alcohol abuse'},
   {'notes': [{'text': 'count noun', 'type': 'grammaticalNote'}],
    'text': '\n              an abuse of public funds'}]},
 {'id': 'm_en_gbus0003740.030',
  'definitions': ['cruel and violent treatment of a person or animal'],
  'examples': [{'text': 'a black eye and other signs of physical

In [69]:
l1 = ['definitions', 'domainClasses', 'examples', 'id', 'semanticClasses', 'shortDefinitions', 'subsenses', 'synonyms', 'thesaurusLinks']
l2 = ['examples', 'definitions', 'id']

list(set(l2) ^ set(l1))

['thesaurusLinks',
 'subsenses',
 'semanticClasses',
 'domainClasses',
 'shortDefinitions',
 'synonyms']

In [175]:
## Read words and load into json file:
TARGET_WORDS_PATH = '../data/target_words/target.txt'
with open(TARGET_WORDS_PATH) as f:
    full_text = f.read()

all_words = []
for word in full_text.split('\n'):
    try:
        out_dict = run_words(word)
        all_words.append(out_dict)
    except ValueError:
        continue

all_words

KeyboardInterrupt: 

In [169]:
def look_for_poly():
    for word in all_words:
        if len(word) > 4:
            yield word[0]['word']

# with open('../News-data-project/target words/senses_oxford_api.txt', 'w') as f:
#     json.dump(list(look_for_poly()), f, indent=4)

In [172]:
list(look_for_poly())

['state', 'right', 'around', 'black', 'force', 'interest', 'support', 'charge']

In [105]:
from nltk import pos_tag, word_tokenize

def search_word_tag(tagged_words:list, word_s:str):
    return next(word for word in tagged_words if word[0] == word_s)

for sens in senses:
    for ex in sens['examples']:
        tokens = word_tokenize(ex)
        tags = pos_tag(tokens)
        print(tags)
        print('\n')
        # print(search_word_tag(tags, 'love'))


[('That', 'DT'), ('year', 'NN'), ('he', 'PRP'), ('moved', 'VBD'), ('to', 'TO'), ('London', 'NNP'), ('but', 'CC'), ('his', 'PRP$'), ('love', 'NN'), ('for', 'IN'), ('Wales', 'NNP'), ('was', 'VBD'), ('strong', 'JJ'), ('and', 'CC'), ('he', 'PRP'), ('eventually', 'RB'), ('settled', 'VBD'), ('permanently', 'RB'), ('there', 'RB'), ('.', '.')]


[('His', 'PRP$'), ('love', 'NN'), ('for', 'IN'), ('children', 'NNS'), ('and', 'CC'), ('affection', 'NN'), ('for', 'IN'), ('the', 'DT'), ('sick', 'NN'), ('have', 'VBP'), ('endeared', 'VBN'), ('him', 'PRP'), ('to', 'TO'), ('all', 'DT'), ('.', '.')]


[('My', 'PRP$'), ('brother', 'NN'), (',', ','), ('and', 'CC'), ('his', 'PRP$'), ('real', 'JJ'), (',', ','), ('strong', 'JJ'), ('love', 'NN'), ('for', 'IN'), ('me', 'PRP'), ('that', 'DT'), ('was', 'VBD'), ('able', 'JJ'), ('to', 'TO'), ('pull', 'VB'), ('me', 'PRP'), ('back', 'RB'), ('into', 'IN'), ('the', 'DT'), ('world', 'NN'), ('I', 'PRP'), ('know', 'VBP'), ('.', '.')]


[('Each', 'DT'), ('one', 'CD'), ('is'

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/aymanehachcham/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
