# Extract company names from text

In [1]:
text = """Sibanye Gold (JSE:SGL) (NYSE:SBGL), the world's number ten producer of gold, has been informed by a US regulatory body that it needs more time to study the South African company's $2.2 billion acquisition of Stillwater Mining (NYSE:SWC), the only US platinum producer.

Sibanye said the investigation, follows the initial review period which concluded at the end of February and will be completed by no later than April 14, 2017, although it is possible the Committee on Foreign Investment in the US (CFIUS) investigation could be concluded sooner.

The notification is in line with standard CFIUS procedure for a transaction of this nature according to a company statement:

The Transaction remains on schedule for closure during the second calendar quarter of 2017 and, in addition to the CFIUS approval, remains subject to the approval of the Transaction by the holders of a majority of Stillwater’s outstanding shares, the approval of the Transaction by the holders of a majority of Sibanye’s shares present and voting, the approval of the related issuance of shares by Sibanye in the context of a potential rights issue by the holders of at least 75% of the shares present and voting, and other customary conditions.
US antitrust authorities gave a nod to the transaction in January while the South African Reserve Bank gave its approval to the deal last month.

The deal announced in December gave Stillwater shareholders a healthy 22% premium over the ruling share price, but investors in Sibanye was skeptical, hammering the Johannesburg-based firms stock down 15% on the news. Stillwater shares have more than doubled in value the past year while Sibanye investors are nursing a 42% loss.

Together with reducing Sibanye’s dependence on its aging South African mines, the deal will make the company the world's third largest palladium producer and fourth biggest platinum group metals miner. Stillwater is the biggest producer of PGMs outside South Africa and Russia which together control 70-80% of global output.

The Johannesburg-listed company, which was spun out of South Africa’s Gold Fields in 2013, spent most of last year shopping for new mines, particularly in the platinum sector.

The company first expanded into PGMs, used in jewellery and as catalytic converters in the vehicle industry, in September 2015  buying Aquarius Platinum and three Anglo American Platinum mines.

Sibanye on Friday also declared total gold mineral reserves of 28.7Moz, a reduction of 2.3Moz, which after accounting for depletion of 1.6Moz due to mining activities in 2016, equates to a 2% decrease year on year. The company's maiden 4E PGM (platinum, palladium, rhodium and gold) mineral resources came in at 126.5Moz while reserves of 23.2Moz were declared.

On Friday, the palladium price was trading down with Nymex contracts exchanging hands for $774 an ounce. Last month palladium hit a 21-month high just shy of $800 and the precious metal is trading some 15% for the better so far in 2017.

"These are some of the lowest cost ounces in the world," chief executive Neal Froneman said in a statement, referring to Stillwater's assets.

Together with reducing Sibanye’s dependence on its aging South African mines, the deal will make the company the world's third largest palladium producer and fourth biggest platinum group metals miner, Froneman noted.

Platinum has  gained nearly 12% year-to-date, exchanging hands for $1,001 an ounce on Wednesday after hitting its highest level since August in February.

Little traded rhodium has hit high of $10,000 an ounce a decade ago, but could be picked up mid-2016 for less than $600 an ounce. It has recovered since then, quoted at $825 recently.
"""

In [2]:
l = ['Kachchh', 'Saurashtra', 'Arabian Sea', '(a)', 'Malwa Plateau', 'i 3', 'Deccan Plateau', 'India', '200 Km', 'Bay of Bengal', '']
#text = ' and '.join(l)

In [3]:
len(text)

3699

In [4]:
from secrets import mashape as key

In [5]:
url = "https://topics-extraction.p.mashape.com/topics-2.0.php"
params = {'txt': text,
          'txtf': 'plain',  # could be html
          'dm': 's',  # semantic disambiguation includes morphosyntactic disambiguation
          'lang': 'en',
          'of': 'json',
          'rt': 'n',   # relaxed typography / strictness
          'sdg': 'l',  # Semantic disambiguation grouping (only if dm=s)
          'timeref': '2017-03-03 18:00:00 GMT-04:00',  # For interpreting relative time
          'st': 'n',
          'tt': 'a',  # topic types
          'uw': 'n',  # try to deal with unknown words (eg b/c typos)
          'ud': ''    # user dictionary
         }

In [6]:
import requests

headers = {
    "X-Mashape-Key": key,
    "Accept": "application/json"
}

r = requests.get(url, headers=headers, params=params)

In [7]:
r

<Response [200]>

In [8]:
j = r.json()

In [9]:
j.keys()

dict_keys(['concept_list', 'status', 'money_expression_list', 'time_expression_list', 'relation_list', 'entity_list', 'other_expression_list', 'quantity_expression_list', 'quotation_list'])

In [10]:
j['quotation_list']

[{'endp': '3053',
  'form': 'These are some of the lowest cost ounces in the world,',
  'inip': '2998'}]

In [11]:
j['quantity_expression_list']

[{'amount_form': '2.2 billion',
  'endp': '224',
  'form': '$2.2 billion acquisition of Stillwater Mining',
  'inip': '180',
  'numeric_value': '2.2e+09',
  'unit': 'acquisition'},
 {'amount_form': '75%',
  'endp': '1168',
  'form': 'at least 75% of the shares',
  'inip': '1143',
  'numeric_value': '0.75',
  'unit': '%'},
 {'amount_form': '22%',
  'endp': '1440',
  'form': 'a healthy 22%',
  'inip': '1428',
  'numeric_value': '0.22',
  'unit': '%'},
 {'amount_form': '15%',
  'endp': '1583',
  'form': '15% on the news',
  'inip': '1569',
  'numeric_value': '0.15',
  'unit': '%'},
 {'amount_form': '42%',
  'endp': '1695',
  'form': 'a 42% loss',
  'inip': '1686',
  'numeric_value': '0.42',
  'unit': '%'},
 {'amount_form': '70-80%',
  'endp': '2021',
  'form': '70-80% of global output',
  'inip': '1999',
  'numeric_value': '0.7',
  'unit': '%'},
 {'amount_form': 'three',
  'endp': '2393',
  'form': 'three Anglo American Platinum mines',
  'inip': '2359',
  'numeric_value': '3',
  'unit': 

In [12]:
j['entity_list']

[{'form': 'Sibanye Gold Limited',
  'id': '1bfe7007e9',
  'relevance': '100',
  'sementity': {'class': 'instance',
   'fiction': 'nonfiction',
   'id': 'ODENTITY_COMPANY',
   'type': 'Top>Organization>Company'},
  'semld_list': ['sumo:Business'],
  'standard_list': [{'id': 'NYSE', 'value': 'SBGL'}],
  'variant_list': [{'endp': '11', 'form': 'Sibanye Gold', 'inip': '0'},
   {'endp': '276', 'form': 'Sibanye', 'inip': '270'},
   {'endp': '993', 'form': 'Sibanye', 'inip': '987'},
   {'endp': '1080', 'form': 'Sibanye', 'inip': '1074'},
   {'endp': '1502', 'form': 'Sibanye', 'inip': '1496'},
   {'endp': '1662', 'form': 'Sibanye', 'inip': '1656'},
   {'endp': '1728', 'form': 'Sibanye', 'inip': '1722'},
   {'endp': '2403', 'form': 'Sibanye', 'inip': '2397'},
   {'endp': '3170', 'form': 'Sibanye', 'inip': '3164'}]},
 {'form': 'Stillwater Mining Company',
  'id': '68fdc1077a',
  'relevance': '66',
  'sementity': {'class': 'instance',
   'fiction': 'nonfiction',
   'id': 'ODENTITY_MINERAL_RESOURC

In [13]:
[x['form'] for x in j['entity_list'] if 'Person' in x['sementity']['type']]

['Neal Froneman']

In [14]:
[x['form'] for x in j['entity_list'] if 'Company' in x['sementity']['type']]

['Sibanye Gold Limited', 'Stillwater Mining Company', 'Gold Fields']

In [15]:
j['time_expression_list']

[{'endp': '235',
  'form': "more time to study the South African company's $2.2 billion acquisition of Stillwater Mining (NYSE:SWC)",
  'inip': '133'},
 {'actual_time': '2017-02-03',
  'endp': '373',
  'form': 'February',
  'inip': '366',
  'normalized_form': '|||||2|||||',
  'precision': 'month'},
 {'actual_time': '2017-04-14',
  'endp': '427',
  'form': 'April 14, 2017',
  'inip': '414',
  'normalized_form': '21||||2017|4|14||||',
  'precision': 'day'},
 {'endp': '546', 'form': 'sooner', 'inip': '541'},
 {'actual_time': '2017-01-03',
  'endp': '1286',
  'form': 'in January',
  'inip': '1277',
  'normalized_form': '|||||1|||||',
  'precision': 'month'},
 {'endp': '1364', 'form': 'the deal last month', 'inip': '1346'},
 {'actual_time': '2017-12-03',
  'endp': '1397',
  'form': 'in December',
  'inip': '1387',
  'normalized_form': '|||||12|||||',
  'precision': 'month'},
 {'endp': '1648', 'form': 'the past year', 'inip': '1636'},
 {'actual_time': '2013-03-03',
  'endp': '2113',
  'form'

In [16]:
[x['form'] for x in j['entity_list'] if 'Location' in x['sementity']['type']]

['United States', 'Johannesburg', 'South Africa', 'Russia']

## Meaning Cloud directly

In [17]:
from secrets import meaningcloud

In [18]:
def analyse_text(t, txtf='plain'):
    url = "http://api.meaningcloud.com/topics-2.0"

    data = {
        'key': meaningcloud,
        'lang': 'en',
        'txt': text,
        'txtf': txtf,
        'tt': 'a'
        }

    headers = {'content-type': 'application/x-www-form-urlencoded'}

    r = requests.request("POST", url, data=data, headers=headers)

    return r.json()

In [19]:
def get_people(j):
    return [x['form'] for x in j['entity_list'] if 'Person' in x['sementity']['type']]

In [20]:
def get_companies(j):
    return [x['form'] for x in j['entity_list'] if 'Company' in x['sementity']['type']]

In [21]:
def get_places(j):
    return [x['form'] for x in j['entity_list'] if 'Location' in x['sementity']['type']]

In [22]:
def get_elements(j):
    return [x['form'] for x in j['concept_list'] if 'Element' in x['sementity']['type']]

In [23]:
get_people(analyse_text(text))

['Neal Froneman']

In [45]:
[get_symbol(x) for x in get_companies(analyse_text(text))]

[('Sibanye Gold Limited', 'SBGL'),
 ('Stillwater Mining Company', 'SWC'),
 ('Gold Fields Limited', 'GFI')]

In [None]:
get_places(analyse_text(text))

In [None]:
get_elements(analyse_text(text))

## Large HTML string

In [25]:
with open('../data/example.html', 'r') as f:
    html = f.read()

In [26]:
get_elements(analyse_text(html, 'html'))

['platinum', 'gold', 'rhodium']

## Functions

In [27]:
def analyse_text_mashape(t):
    url = "https://topics-extraction.p.mashape.com/topics-2.0.php"
    params = {'txt': text,
              'txtf': 'plain',  # could be html
              'dm': 's',  # semantic disambiguation includes morphosyntactic disambiguation
              'lang': 'en',
              'of': 'json',
              'rt': 'n',   # relaxed typography / strictness
              'sdg': 'l',  # Semantic disambiguation grouping (only if dm=s)
              'timeref': '2017-03-03 18:00:00 GMT-04:00',  # For interpreting relative time
              'st': 'n',
              'tt': 'a',  # topic types
              'uw': 'n',  # try to deal with unknown words (eg b/c typos)
              'ud': ''    # user dictionary
             }
    
    headers = {
        "X-Mashape-Key": key,
        "Accept": "application/json"
    }

    r = requests.get(url, headers=headers, params=params)
    
    return r.json()

In [28]:
get_people(analyse_text(text))

['Neal Froneman']

In [44]:
[get_symbol(x) for x in get_companies(analyse_text(text))]

[('Sibanye Gold Limited', 'SBGL'),
 ('Stillwater Mining Company', 'SWC'),
 ('Gold Fields Limited', 'GFI')]

In [None]:
get_places(analyse_text(text))

# Find places

In [30]:
import geocoder

In [31]:
g = geocoder.google('Deccan Plateau')
g.latlng

[14, 77]

In [32]:
g.country

'IN'

In [33]:
g.confidence

9

In [34]:
g.state

'AP'

# Guess ticker symbols from company name

In [35]:
import requests

In [36]:
url = "http://d.yimg.com/aq/autoc"

q = "rupert resources"

params = {
    "query": q,
    "region": "IN",
    "lang": "en-US",
    "callback": "YAHOO.Finance.SymbolSuggest.ssCallback"
}

In [37]:
r = requests.get(url, params=params)

In [38]:
import re

result, = re.findall(r"\((.*)\)", r.text)

In [46]:
import json

best = json.loads(result)['ResultSet']['Result'][0]

In [40]:
best

{'exch': 'VAN',
 'exchDisp': 'CDNX',
 'name': 'Rupert Resources Ltd.',
 'symbol': 'RUP.V',
 'type': 'S',
 'typeDisp': 'Equity'}

In [41]:
import re
import json
import requests

def get_symbol(q):
    url = "http://d.yimg.com/aq/autoc"

    params = {
        "query": q,
        "region": "IN",
        "lang": "en-US",
        "callback": "YAHOO.Finance.SymbolSuggest.ssCallback"
    }
    
    r = requests.get(url, params=params)
    j, = re.findall(r"\((.*)\)", r.text)
    d = json.loads(j)['ResultSet']['Result'][0]
    return d['name'], d['symbol']

In [42]:
get_symbol('Rupert Resources')

('Rupert Resources Ltd.', 'RUP.V')

In [43]:
get_symbol('Sibanye Gold')

('Sibanye Gold Limited', 'SBGL')

# Get quote from ticker

In [47]:
url = "https://query.yahooapis.com/v1/public/yql"

symbol = 'RUP.V'

params = {
    "q": 'select * from yahoo.finance.quotes where symbol in ("{}")'.format(symbol),
    "format": "json",
    "env": "store://datatables.org/alltableswithkeys"
}

r = requests.get(url, params=params)

In [48]:
r.json()

{'query': {'count': 1,
  'created': '2017-03-05T01:13:43Z',
  'lang': 'en-US',
  'results': {'quote': {'AfterHoursChangeRealtime': None,
    'AnnualizedGain': None,
    'Ask': '1.33',
    'AskRealtime': None,
    'AverageDailyVolume': '76224',
    'Bid': '1.32',
    'BidRealtime': None,
    'BookValue': '0.03',
    'Change': '+0.02',
    'ChangeFromFiftydayMovingAverage': '0.22',
    'ChangeFromTwoHundreddayMovingAverage': '0.46',
    'ChangeFromYearHigh': '-0.15',
    'ChangeFromYearLow': '1.27',
    'ChangePercentRealtime': None,
    'ChangeRealtime': None,
    'Change_PercentChange': '+0.02 - +1.54%',
    'ChangeinPercent': '+1.54%',
    'Commission': None,
    'Currency': 'CAD',
    'DaysHigh': '1.35',
    'DaysLow': '1.31',
    'DaysRange': '1.31 - 1.35',
    'DaysRangeRealtime': None,
    'DaysValueChange': None,
    'DaysValueChangeRealtime': None,
    'DividendPayDate': None,
    'DividendShare': None,
    'DividendYield': None,
    'EBITDA': '-1.58M',
    'EPSEstimateCurrentYe

In [49]:
quote = r.json()['query']['results']['quote']

In [50]:
quote['Currency']

'CAD'

In [51]:
quote['LastTradePriceOnly']

'1.32'

In [52]:
quote['MarketCapitalization']

'131.21M'

In [53]:
import requests

def get_quote(s):
    url = "https://query.yahooapis.com/v1/public/yql"

    params = {
        "q": 'select * from yahoo.finance.quotes where symbol in ("{}")'.format(s),
        "format": "json",
        "env": "store://datatables.org/alltableswithkeys"
    }

    r = requests.get(url, params=params)
    
    try:
        quote = r.json()['query']['results']['quote']
    except:
        return None
    
    return quote['LastTradePriceOnly'], quote['Currency']


In [54]:
get_quote('SBGL')

('7.90', 'USD')

# Try endpoint

In [229]:
url = "http://localhost:5000/bar"

headers = {"Content-Type": "application/json"}

data = {
    'text': text,
    'animal': 'kitten',
}

r = requests.post(url, headers=headers, data=json.dumps(data))

In [230]:
r.text

'kitten'

In [231]:
r.json()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)