In [1]:
import json
import sys

from googleapiclient import discovery
import httplib2
from oauth2client.client import GoogleCredentials
from oauth2client.service_account import ServiceAccountCredentials

In [2]:
def get_service(keyfile=None):
    scopes = ['https://www.googleapis.com/auth/cloud-platform']
    print keyfile
    if keyfile:
        scoped_credentials = ServiceAccountCredentials.from_json_keyfile_name(keyfile, scopes)
    else:
        credentials = GoogleCredentials.get_application_default()
        scoped_credentials = credentials.create_scoped(scopes)
    http = httplib2.Http()
    scoped_credentials.authorize(http)
    return discovery.build('language', 'v1beta1', http=http)


def get_native_encoding_type():
    """Returns the encoding type that matches Python's native strings."""
    if sys.maxunicode == 65535:
        return 'UTF16'
    else:
        return 'UTF32'


def analyze_entities(text, encoding='UTF32', keyfile=None):
    body = {
        'document': {
            'type': 'PLAIN_TEXT',
            'content': text,
        },
        'encodingType': encoding,
    }

    service = get_service(keyfile)

    request = service.documents().analyzeEntities(body=body)
    response = request.execute()

    return response


def annotate_text(text, encoding='UTF32', keyfile=None):
    body = {
        'document': {
            'type': 'PLAIN_TEXT',
            'content': text,
        },
        'features': {
            'extract_syntax': True,
        },
        'encodingType': encoding,
    }

    service = get_service(keyfile)

    request = service.documents().annotateText(body=body)
    response = request.execute()

    return response

In [17]:
text ="""\
Donald Trump, expanding on the provocative immigration ideas that have propelled his presidential candidacy, proposed on Monday a new ideological test that would limit immigrants seeking admission to the U.S. to “those who share our values and respect our people.”

He argued in a speech fleshing out his plans to combat terrorism that tighter immigration standards were needed to fight Islamic State with the same vigor with which the U.S. fought the Cold War. “We will be tough, and we will be even extreme,” he said.
"""
keyfile = "/Users/royd/djsyndicationhub-prod.json"
analyze_entities(text, keyfile=keyfile)

/Users/royd/djsyndicationhub-prod.json


{u'entities': [{u'mentions': [{u'text': {u'beginOffset': 0,
      u'content': u'Donald Trump'}}],
   u'metadata': {u'wikipedia_url': u'http://en.wikipedia.org/wiki/Donald_Trump'},
   u'name': u'Donald Trump',
   u'salience': 0.32294503,
   u'type': u'PERSON'},
  {u'mentions': [{u'text': {u'beginOffset': 204, u'content': u'U.S.'}},
    {u'text': {u'beginOffset': 436, u'content': u'U.S.'}}],
   u'metadata': {u'wikipedia_url': u'http://en.wikipedia.org/wiki/United_States'},
   u'name': u'U.S.',
   u'salience': 0.011790325,
   u'type': u'LOCATION'},
  {u'mentions': [{u'text': {u'beginOffset': 452, u'content': u'Cold War'}}],
   u'metadata': {u'wikipedia_url': u'http://en.wikipedia.org/wiki/Cold_War'},
   u'name': u'Cold War',
   u'salience': 0.0058336239,
   u'type': u'EVENT'},
  {u'mentions': [{u'text': {u'beginOffset': 387,
      u'content': u'Islamic State'}}],
   u'metadata': {u'wikipedia_url': u'http://en.wikipedia.org/wiki/Islamic_State_of_Iraq_and_the_Levant'},
   u'name': u'Islamic

In [18]:
entities = _

In [21]:
locations = [x for x in entities['entities'] if x['type'] == 'LOCATION']

[{u'mentions': [{u'text': {u'beginOffset': 204, u'content': u'U.S.'}},
   {u'text': {u'beginOffset': 436, u'content': u'U.S.'}}],
  u'metadata': {u'wikipedia_url': u'http://en.wikipedia.org/wiki/United_States'},
  u'name': u'U.S.',
  u'salience': 0.011790325,
  u'type': u'LOCATION'}]

In [25]:
import requests

response = requests.get("https://maps.googleapis.com/maps/api/geocode/json?address={0}&key={1}".format("U.S.", "AIzaSyBiNLcShR8kXf9oERXkAhO51KdYNhCP2pE"))
response.json()

{u'results': [{u'address_components': [{u'long_name': u'United States',
     u'short_name': u'US',
     u'types': [u'country', u'political']}],
   u'formatted_address': u'United States',
   u'geometry': {u'bounds': {u'northeast': {u'lat': 71.3867745,
      u'lng': -66.9502861},
     u'southwest': {u'lat': 18.9106771, u'lng': 172.4458954}},
    u'location': {u'lat': 37.09024, u'lng': -95.712891},
    u'location_type': u'APPROXIMATE',
    u'viewport': {u'northeast': {u'lat': 49.38, u'lng': -66.94},
     u'southwest': {u'lat': 25.82, u'lng': -124.39}}},
   u'place_id': u'ChIJCzYy5IS16lQRQrfeQ5K5Oxw',
   u'types': [u'country', u'political']}],
 u'status': u'OK'}

In [28]:
# url, text, synhub
"""
{
  "data": {
    "type": "articles",
    "id": "123",
    "attributes": {
      "an": null,
      "document_type": null,
      "revision_number": null,
      "enrichment_level": null,
      "action": null,
      "upsert_date": null,
      "publication_date": null,
      "language_code": null,
      "title": null,
      "byline": null,
      "dateline": null,
      "lead_paragraph": null,
      "tail_paragraphs": null,
      "source_code": null,
      "credit": null,
      "copyright": null,
      "art": null,
      "region_of_origin": null,
      "publisher_name": null,
      "wordcount": null,
      "metadata": [
        {
          "category": null,
          "why": null,
          "value": "test",
          "type": "code",
          "uri": "factiva://test"
        }
      ]
    }
  },
  "links": {
    "self": null
  }
}
"""
0

0

In [31]:
annotate_text(text, keyfile=keyfile)

/Users/royd/djsyndicationhub-prod.json


{u'entities': [],
 u'language': u'en',
 u'sentences': [{u'text': {u'beginOffset': 0,
    u'content': u'Donald Trump, expanding on the provocative immigration ideas that have propelled his presidential candidacy, proposed on Monday a new ideological test that would limit immigrants seeking admission to the U.S. to \u201cthose who share our values and respect our people.\u201d'}},
  {u'text': {u'beginOffset': 266,
    u'content': u'He argued in a speech fleshing out his plans to combat terrorism that tighter immigration standards were needed to fight Islamic State with the same vigor with which the U.S. fought the Cold War.'}},
  {u'text': {u'beginOffset': 462,
    u'content': u'\u201cWe will be tough, and we will be even extreme,\u201d he said.'}}],
 u'tokens': [{u'dependencyEdge': {u'headTokenIndex': 1, u'label': u'NN'},
   u'lemma': u'Donald',
   u'partOfSpeech': {u'tag': u'NOUN'},
   u'text': {u'beginOffset': 0, u'content': u'Donald'}},
  {u'dependencyEdge': {u'headTokenIndex': 16, u

In [47]:
## Complete Example dj-synhub-locationator
import json
import sys

import requests
from googleapiclient import discovery
import httplib2
from oauth2client.client import GoogleCredentials
from oauth2client.service_account import ServiceAccountCredentials


GOOGLE_GEOCODE_API_KEY = "AIzaSyBiNLcShR8kXf9oERXkAhO51KdYNhCP2pE"
GOOGLE_PROJECT_KEYFILE = "/Users/royd/djsyndicationhub-prod.json"

def get_service(keyfile=None):
    scopes = ['https://www.googleapis.com/auth/cloud-platform']
    if keyfile:
        scoped_credentials = ServiceAccountCredentials.from_json_keyfile_name(keyfile, scopes)
    else:
        credentials = GoogleCredentials.get_application_default()
        scoped_credentials = credentials.create_scoped(scopes)
    http = httplib2.Http()
    scoped_credentials.authorize(http)
    return discovery.build('language', 'v1beta1', http=http)


def analyze_entities(text, keyfile=None):
    body = {
        'document': {
            'type': 'PLAIN_TEXT',
            'content': text,
        },
        'encodingType': 'UTF32',
    }

    service = get_service(keyfile)

    request = service.documents().analyzeEntities(body=body)
    response = request.execute()

    return response


def locate(location, key=GOOGLE_GEOCODE_API_KEY):
    response = requests.get("https://maps.googleapis.com/maps/api/geocode/json?address={0}&key={1}".format(location, key))
    return response.json()


def get_locations(text, keyfile=GOOGLE_PROJECT_KEYFILE):
    entities = analyze_entities(text, keyfile=keyfile).get('entities')
    locations = [x for x in entities if x['type'] == 'LOCATION']
    # print locations
    for location in locations:
        if location.get('name'):
            yield locate(location.get('name')).get('results')[0]

In [49]:
text ="""\
Donald Trump, expanding on the provocative immigration ideas that have propelled his presidential candidacy, proposed on Monday a new ideological test that would limit immigrants seeking admission to the U.S. to “those who share our values and respect our people.”

He argued in a speech fleshing out his plans to combat terrorism that tighter immigration standards were needed to fight Islamic State with the same vigor with which the U.S. fought the Cold War. “We will be tough, and we will be even extreme,” he said.

Also, i live in Minnesota.
"""
        
json.dumps(list(get_locations(text)))

/Users/royd/djsyndicationhub-prod.json


'[{"geometry": {"location_type": "APPROXIMATE", "bounds": {"northeast": {"lat": 71.3867745, "lng": -66.9502861}, "southwest": {"lat": 18.9106771, "lng": 172.4458954}}, "viewport": {"northeast": {"lat": 49.38, "lng": -66.94}, "southwest": {"lat": 25.82, "lng": -124.39}}, "location": {"lat": 37.09024, "lng": -95.712891}}, "address_components": [{"long_name": "United States", "types": ["country", "political"], "short_name": "US"}], "place_id": "ChIJCzYy5IS16lQRQrfeQ5K5Oxw", "formatted_address": "United States", "types": ["country", "political"]}, {"geometry": {"location_type": "APPROXIMATE", "bounds": {"northeast": {"lat": 49.3828205, "lng": -89.49183339999999}, "southwest": {"lat": 43.4993609, "lng": -97.2391959}}, "viewport": {"northeast": {"lat": 49.3826901, "lng": -89.49183339999999}, "southwest": {"lat": 43.4993609, "lng": -97.2391959}}, "location": {"lat": 46.729553, "lng": -94.6858998}}, "address_components": [{"long_name": "Minnesota", "types": ["administrative_area_level_1", "pol

In [36]:
text

'Donald Trump, expanding on the provocative immigration ideas that have propelled his presidential candidacy, proposed on Monday a new ideological test that would limit immigrants seeking admission to the U.S. to \xe2\x80\x9cthose who share our values and respect our people.\xe2\x80\x9d\n\nHe argued in a speech fleshing out his plans to combat terrorism that tighter immigration standards were needed to fight Islamic State with the same vigor with which the U.S. fought the Cold War. \xe2\x80\x9cWe will be tough, and we will be even extreme,\xe2\x80\x9d he said.\n'

In [2]:
article = {
  "data": {
    "type": "articles",
    "id": "123",
    "attributes": {
      "an": "Sample text with Dutluth",
      "document_type": "Sample text with Dutluth",
      "revision_number": "Sample text with Dutluth",
      "enrichment_level": "Sample text with Dutluth",
      "action": "Sample text with Dutluth",
      "upsert_date": "Sample text with Dutluth",
      "publication_date": "Sample text with Dutluth",
      "language_code": "Sample text with Dutluth",
      "title": "Sample text with Dutluth",
      "byline": "Sample text with Dutluth",
      "dateline": "Sample text with Dutluth",
      "lead_paragraph": "Sample text with Duluth",
      "tail_paragraphs": "Sample text with Dutluth",
      "source_code": "Sample text with Dutluth",
      "credit": "Sample text with Dutluth",
      "copyright": "Sample text with Dutluth",
      "art": "Sample text with Dutluth",
      "region_of_origin": "Sample text with Dutluth",
      "publisher_name": "Sample text with Dutluth",
      "wordcount": "Sample text with Dutluth",
      "metadata": [
        {
          "category": "Sample text with Dutluth",
          "why": "Sample text with Dutluth",
          "value": "test",
          "type": "code",
          "uri": "factiva://test"
        }
      ]
    }
  },
  "links": {
    "self": "Sample text with Duluth"
  }
}

In [3]:
article['data']["attributes"].get("title")
article['data']["attributes"].get("byline")
article['data']["attributes"].get("tail_paragraphs")

'Sample text with Dutluth'

In [4]:
text = []

In [5]:
text.append('a')

In [6]:
text

['a']

In [13]:
def a(article):
    title = article['data']["attributes"].get("title")
    byline = article['data']["attributes"].get("byline")
    lead_paragraph = article['data']["attributes"].get("lead_paragraph")
    tail_paragraphs = article['data']["attributes"].get("tail_paragraphs")
    return [x for x in [title, byline, lead_paragraph, tail_paragraphs] if x]

In [14]:
a(article)

['Sample text with Dutluth',
 'Sample text with Dutluth',
 'Sample text with Dutluth',
 'Sample text with Dutluth']

In [9]:
article

{'data': {'attributes': {'action': 'Sample text with Dutluth',
   'an': 'Sample text with Dutluth',
   'art': 'Sample text with Dutluth',
   'byline': 'Sample text with Dutluth',
   'copyright': 'Sample text with Dutluth',
   'credit': 'Sample text with Dutluth',
   'dateline': 'Sample text with Dutluth',
   'document_type': 'Sample text with Dutluth',
   'enrichment_level': 'Sample text with Dutluth',
   'language_code': 'Sample text with Dutluth',
   'lead_paragraph': 'Sample text with Dutluth',
   'metadata': [{'category': 'Sample text with Dutluth',
     'type': 'code',
     'uri': 'factiva://test',
     'value': 'test',
     'why': 'Sample text with Dutluth'}],
   'publication_date': 'Sample text with Dutluth',
   'publisher_name': 'Sample text with Dutluth',
   'region_of_origin': 'Sample text with Dutluth',
   'revision_number': 'Sample text with Dutluth',
   'source_code': 'Sample text with Dutluth',
   'tail_paragraphs': 'Sample text with Dutluth',
   'title': 'Sample text wit

In [15]:
"fdfdf/".rstrip('/')

'fdfdf'