In [20]:
import nltk 
import pprint

sample = """Gandhi famously led Indians in challenging the British-imposed salt tax with the 400 km (250 mi) Dandi Salt March in 1930, and later in calling for the British to Quit India in 1942. He was imprisoned for many years, upon many occasions, in both South Africa and India. Gandhi attempted to practise nonviolence and truth in all situations, and advocated that others do the same. He lived modestly in a self-sufficient residential community and wore the traditional Indian dhoti and shawl, woven with yarn hand-spun on a charkha. He ate simple vegetarian food, and also undertook long fasts as a means of both self-purification and social protest."""

sentences = nltk.sent_tokenize(sample)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)

def extract_entity_names(t):
    entity_names = []

    if hasattr(t, 'label') and t.label:
        if t.label() == 'NE':
            print t
            entity_names.append(' '.join([child[0] for child in t]))
        else:
            for child in t:
                entity_names.extend(extract_entity_names(child))

    return entity_names

entity_names = []
for tree in chunked_sentences:
    # Print results per sentence
    # print extract_entity_names(tree)

    entity_names.extend(extract_entity_names(tree))

# Print all entity names
#print entity_names

# Print unique entity names
print set(entity_names)

(NE Gandhi/NNP)
(NE Dandi/NNP Salt/NNP March/NNP)
(NE British/JJ)
(NE India/NNP)
(NE South/NNP Africa/NNP)
(NE India/NNP)
(NE Gandhi/NNP)
(NE Indian/JJ)
set(['South Africa', 'India', 'British', 'Indian', 'Gandhi', 'Dandi Salt March'])


In [22]:
print(nltk.ne_chunk(tagged_sentences[0]))

(S
  (GPE Gandhi/NNP)
  famously/RB
  led/VBD
  (GPE Indians/NNPS)
  in/IN
  challenging/VBG
  the/DT
  British-imposed/JJ
  salt/NN
  tax/NN
  with/IN
  the/DT
  400/CD
  km/NN
  (/(
  250/CD
  mi/NN
  )/)
  (PERSON Dandi/NNP Salt/NNP)
  March/NNP
  in/IN
  1930/CD
  ,/,
  and/CC
  later/RB
  in/IN
  calling/VBG
  for/IN
  the/DT
  (GPE British/JJ)
  to/TO
  Quit/VB
  (GPE India/NNP)
  in/IN
  1942/CD
  ./.)


In [41]:
from nltk.tag import StanfordNERTagger
st = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz')

sent = "Gandhi initially favoured offering nonviolent moral support to the British effort when World War II broke out in 1939, but the Congressional leaders were offended by the unilateral inclusion of India in the war without consultation of the people's representatives. All Congressmen resigned from office.[124] After long deliberations, Gandhi declared that India could not be party to a war ostensibly being fought for democratic freedom while that freedom was denied to India itself. As the war progressed, Gandhi intensified his demand for independence, calling for the British to Quit India in a speech at Gowalia Tank Maidan. This was Gandhi's and the Congress Party's most definitive revolt aimed at securing the British exit from India. Gandhi was criticised by some Congress party members and other Indian political groups, both pro-British and anti-British. Some felt that not supporting Britain more in its struggle against Nazi Germany was unethical. Others felt that Gandhi's refusal for India to participate in the war was insufficient and more direct opposition should be taken, while Britain fought against Nazism, it continued to refuse to grant India Independence. Quit India became the most forceful movement in the history of the struggle, with mass arrests and violence on an unprecedented scale."

ners = st.tag(sent.split()) 

In [42]:
ner_dict = {}
for ner in ners:
    if ner[1] != u'O':
        if ner in ner_dict:
            ner_dict[ner] = ner_dict[ner] + 1
        else:
            ner_dict[ner] = 1

In [43]:
pprint.pprint(ner_dict)

{(u'Britain', u'LOCATION'): 2,
 (u'Congress', u'ORGANIZATION'): 2,
 (u'Congressional', u'ORGANIZATION'): 1,
 (u'Gandhi', u'PERSON'): 6,
 (u'Germany', u'LOCATION'): 1,
 (u'Gowalia', u'LOCATION'): 1,
 (u'India', u'LOCATION'): 6,
 (u'Indian', u'LOCATION'): 1,
 (u'Maidan', u'LOCATION'): 1,
 (u'Nazi', u'ORGANIZATION'): 1,
 (u'Party', u'ORGANIZATION'): 1,
 (u'Tank', u'LOCATION'): 1,
 (u'party', u'ORGANIZATION'): 1}
