In [1]:
import nltk, re
import nltk.data
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk import pos_tag
from bs4 import BeautifulSoup

import scrapy, json, ast, copy
from klein import Klein, route, run
from scrapy import signals
from scrapy.crawler import CrawlerRunner
from twisted.internet import reactor
from scrapy.utils.log import configure_logging

In [2]:
''' 
    Author: Chan Pruksapha
    Modified from : https://gist.github.com/onyxfish/322906
    Changes :
        1) Use Treebank Chunk (WSJ corpus based) in place of default chunk (ACE corpus)
        2) Node maching from label 'NE' change to more complex rules
'''

ne_chunk_sents = nltk.data.load('chunkers/treebank_chunk_ub.pickle')

def preprocess(text):
    '''for i in range(len(tokenList)):
        if tokenList[i] == u'–' :
            tokenList[i] = u'-'
        elif tokenList[i] == u'&':
            tokenList[i] = u'Amps'''            
    text = text.replace(u'–', u'-').replace(u'&', u'Amps')
    return text

def postprocess(entity_name):
    return entity_name.replace('Amps ', '& ')

def exclude(wordList):
    
    float_match = re.compile(r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?$').match   
    def is_number_re(val):
        return bool(float_match(val))

    if len(wordList) == 1 and is_number_re(wordList[0]) :        
        return True        
    
    return False
    
def rules(wordList, labelList):
    
    if exclude(wordList) :
        return False

    pairs = zip(wordList, labelList)
    cleanedPairs = filter(lambda (w,l) : l != 'DT' and l!= 'IN', pairs)
    if len(cleanedPairs) == 0 :
        return False
    
    Ws, Ls = zip(*cleanedPairs)
    
    return all(map(lambda w:  w[0].upper() == w[0], Ws)) and Ls[0] == 'NNP'
    
def extract_entity_names(t):
    entity_names = []
    if hasattr(t, 'label') and t.label:
        if t.label() == 'NP':
            wordList = [child[0] for child in t]
            labelList = [child[1] for child in t]
            if rules(wordList, labelList):
                entity_names.append(postprocess(' '.join([child[0] for child in t])))
        else:
            for child in t:
                entity_names.extend(extract_entity_names(child))
    return entity_names

def ne_extract(article):    
    sentences = sent_tokenize(preprocess(article))
    tokenized_sentences = [word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [pos_tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = ne_chunk_sents.parse_sents(tagged_sentences)

    entity_names = []
    for tree in chunked_sentences:
        entity_names.extend(extract_entity_names(tree))

    entity_set = list(set(entity_names))

    return entity_set

In [3]:
import re, urllib

candidates = ["Standard Life Investments", "Laith Khalaf", "Vyas", "Aviva Investors", "Keenan Vyas", "Thursday", "De Montfort University", "MAmpsG Investments", "Duff & Phelps", "Deka", "Canada Life", "the PRA ( Prudential Regulation Authority )", "Bernstein", "BlackRock Inc", "Monday", "Lloyds Banking Group", "Columbia Threadneedle", "Friday", "Director", "Germany", "Henderson Global Investors", "Bank", "Britain 's Financial Ombudsman Service", "Union Investment", "Lansdown", "Aberdeen Asset Management", "Wednesday", "the Real Estate Advisory Group", "Henderson Group", "RBS", "the Financial Conduct Authority 's Chief Executive", "England Governor Mark Carney", "Tuesday", "the Ameriprise Group", "Barclays", "London", "Mediobanca Securities"];

def prepareData(candidates) :
    
    def cleanEnt(e):
        return re.sub(u'[\u201d\u201c\u2019]','', e)
    
    candidates = filter(lambda e: len(e) > 0, map(cleanEnt,candidates))
    url_list = map(lambda e: 'http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query={}'
                   .format(urllib.quote_plus(e.encode('utf8'))), 
                   candidates)
    
    return (candidates, url_list)


def nameMatching(symbol, entity) :

    suffix = set(['plc','holdings', 'group','limited','company',
                                   'entertainments', 'inc', 'corporation', 'corp', 'international', 'ltd'])
    
    day_names = set(map(lambda s:s.lower(),["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]));
    month_names = set(map(lambda s:s.lower(),["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]));
    city_names = set(map(lambda s: s.lower(),["London", "Manchester", "Bath", "Liverpool", "Bristol", "Chester", "Cambridge", "Oxford"]));
    if not symbol:
        return False
    else :
        symbol = re.sub(ur"[\.,]+", "", symbol) # Remove punctuation
        entity = re.sub(ur"[\.,]+", "", entity)
        symbol_tokens = map(lambda s:s.lower(), symbol.split())
        entity_tokens = map(lambda s:s.lower(), entity.split())
        
        if len(entity_tokens) == 1 and entity_tokens[0] in day_names.union(month_names).union(city_names):
            #print entity_tokens[0]
            return False
                 
        for i in range(len(entity_tokens)) :
            if entity_tokens[i] != symbol_tokens[i] :
                return False
        
        return True
    

In [4]:

class YahooSpider(scrapy.Spider):
    name = "cache"
    allowed_domains = ["d.yimg.com"]

    def __init__(self, candidates=[], *args, **kwargs):
        super(YahooSpider, self).__init__(*args, **kwargs)
        self.entity_list, self.start_urls = prepareData(candidates)
        self.entity_by_url = dict(zip(self.start_urls, self.entity_list))
        self.company_entity_pair = []
                    
    def collect(self, json_response, url):
        entity = self.entity_by_url[url]
        yahoo_entity_pair = ((lambda a: a[0] if len(a) > 0 else None)(
                 map(lambda d: (d[u'symbol'],d[u'name']), 
                             filter( lambda e: e[u'exchDisp'] in set(['London','NASDAQ','NYSE']) and e['typeDisp']=='Equity', 
                             json_response['ResultSet']['Result'])[:1])),
                 entity)
           
        self.company_entity_pair.append(yahoo_entity_pair)
            
    def parse(self, response):
        json_response = json.loads(response.body_as_unicode())
        self.collect(json_response, response.request.url)
                   

output_array = []

def callback(spider, reason):
    filteredRes = [c for (c,e) in filter(lambda (c, e): c and nameMatching(c[1], e), spider.company_entity_pair)]
    #print filteredRes
    output_array.extend(filteredRes);
    #reactor.stop()

#from twisted.internet import reactor
#from scrapy.crawler import CrawlerRunner
#from scrapy.utils.log import configure_logging

#configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})
#runner = CrawlerRunner()
#YahooCralwer = runner.create_crawler(YahooSpider)

#YahooCralwer.signals.connect(callback, signal=signals.spider_closed)    
#d = runner.crawl(YahooCralwer, candidates)
    
#reactor.run()

In [5]:
'''
Input: Search Query
Output: List of Articles
'''

configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})
runner = CrawlerRunner()
app = Klein()
 
     
@app.route('/ner', methods=['POST'])
def getSymbolList(request):
    del output_array[:]
    request.setHeader('Access-Control-Allow-Origin', '*')
    post_msg = ast.literal_eval(request.content.read());
    
    candidates = ne_extract(post_msg['content'].decode('utf-8'))
    
    runner = CrawlerRunner()
    YahooAllCralwer = runner.create_crawler(YahooSpider)
    YahooAllCralwer.signals.connect(callback, signal=signals.spider_closed)    
    d = runner.crawl(YahooAllCralwer, candidates)
    d.addCallback(lambda res: json.dumps(
            [{'ticker':t[0],'name':t[1]} for t in output_array]
        ))
    
    return d

app.run(host="0.0.0.0", port=8080)

2016-10-30 22:34:30+0700 [-] Log opened.
2016-10-30 22:34:30+0700 [-] Site starting on 8080
2016-10-30 22:34:30+0700 [-] Starting factory <twisted.web.server.Site instance at 0x114fb6050>


INFO:scrapy.middleware:Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO: Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO:scrapy.middleware:Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
 'scrapy.downloadermiddlewares.chunked.C

2016-10-30 22:34:52+0700 [_GenericHTTPChannelProtocol,0,127.0.0.1] TelnetConsole starting on 6023


DEBUG:scrapy.extensions.telnet:Telnet console listening on 127.0.0.1:6023
DEBUG: Telnet console listening on 127.0.0.1:6023
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Vyas> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Vyas> (referer: None)
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Duff+%26+Phelps> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Duff+%26+Phelps> (referer: None)
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Thursday> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Thursday> (referer: None)
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Aviva+Investors> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/au

2016-10-30 22:34:59+0700 [-] "127.0.0.1" - - [30/Oct/2016:15:34:58 +0000] "POST /ner HTTP/1.1" 200 379 "http://localhost:8000/index.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
2016-10-30 22:34:59+0700 [-] (TCP Port 6023 Closed)


INFO:scrapy.middleware:Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO: Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO:scrapy.middleware:Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
 'scrapy.downloadermiddlewares.chunked.C

2016-10-30 22:42:40+0700 [_GenericHTTPChannelProtocol,1,127.0.0.1] TelnetConsole starting on 6023


DEBUG:scrapy.extensions.telnet:Telnet console listening on 127.0.0.1:6023
DEBUG: Telnet console listening on 127.0.0.1:6023
INFO:scrapy.core.engine:Closing spider (finished)
INFO: Closing spider (finished)
INFO:scrapy.statscollectors:Dumping Scrapy stats:
{'finish_reason': 'finished',
 'finish_time': datetime.datetime(2016, 10, 30, 15, 42, 40, 773609),
 'log_count/DEBUG': 1,
 'log_count/INFO': 7,
 'start_time': datetime.datetime(2016, 10, 30, 15, 42, 40, 767531)}
INFO: Dumping Scrapy stats:
{'finish_reason': 'finished',
 'finish_time': datetime.datetime(2016, 10, 30, 15, 42, 40, 773609),
 'log_count/DEBUG': 1,
 'log_count/INFO': 7,
 'start_time': datetime.datetime(2016, 10, 30, 15, 42, 40, 767531)}
INFO:scrapy.core.engine:Spider closed (finished)
INFO: Spider closed (finished)


2016-10-30 22:42:40+0700 [-] "127.0.0.1" - - [30/Oct/2016:15:42:40 +0000] "POST /ner HTTP/1.1" 200 2 "http://localhost:8000/index.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
2016-10-30 22:42:40+0700 [-] (TCP Port 6023 Closed)


INFO:scrapy.middleware:Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO: Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO:scrapy.middleware:Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
 'scrapy.downloadermiddlewares.chunked.C

2016-10-30 22:43:18+0700 [_GenericHTTPChannelProtocol,1,127.0.0.1] TelnetConsole starting on 6023


DEBUG:scrapy.extensions.telnet:Telnet console listening on 127.0.0.1:6023
DEBUG: Telnet console listening on 127.0.0.1:6023
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Diageo> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Diageo> (referer: None)
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Balfour+Beatty> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Balfour+Beatty> (referer: None)
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Hammerson> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Hammerson> (referer: None)
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=The+Share+Centre> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/

2016-10-30 22:43:22+0700 [-] "127.0.0.1" - - [30/Oct/2016:15:43:22 +0000] "POST /ner HTTP/1.1" 200 675 "http://localhost:8000/index.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
2016-10-30 22:43:22+0700 [-] (TCP Port 6023 Closed)


INFO:scrapy.middleware:Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO: Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO:scrapy.middleware:Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
 'scrapy.downloadermiddlewares.chunked.C

2016-10-30 22:43:33+0700 [_GenericHTTPChannelProtocol,1,127.0.0.1] TelnetConsole starting on 6023


DEBUG:scrapy.extensions.telnet:Telnet console listening on 127.0.0.1:6023
DEBUG: Telnet console listening on 127.0.0.1:6023
INFO:scrapy.core.engine:Closing spider (finished)
INFO: Closing spider (finished)
INFO:scrapy.statscollectors:Dumping Scrapy stats:
{'finish_reason': 'finished',
 'finish_time': datetime.datetime(2016, 10, 30, 15, 43, 33, 724879),
 'log_count/DEBUG': 1,
 'log_count/INFO': 7,
 'start_time': datetime.datetime(2016, 10, 30, 15, 43, 33, 718259)}
INFO: Dumping Scrapy stats:
{'finish_reason': 'finished',
 'finish_time': datetime.datetime(2016, 10, 30, 15, 43, 33, 724879),
 'log_count/DEBUG': 1,
 'log_count/INFO': 7,
 'start_time': datetime.datetime(2016, 10, 30, 15, 43, 33, 718259)}
INFO:scrapy.core.engine:Spider closed (finished)
INFO: Spider closed (finished)


2016-10-30 22:43:33+0700 [-] "127.0.0.1" - - [30/Oct/2016:15:43:33 +0000] "POST /ner HTTP/1.1" 200 2 "http://localhost:8000/index.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
2016-10-30 22:43:33+0700 [-] (TCP Port 6023 Closed)


INFO:scrapy.middleware:Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO: Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
INFO:scrapy.middleware:Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
 'scrapy.downloadermiddlewares.chunked.C

2016-10-30 22:45:30+0700 [_GenericHTTPChannelProtocol,1,127.0.0.1] TelnetConsole starting on 6023


DEBUG:scrapy.extensions.telnet:Telnet console listening on 127.0.0.1:6023
DEBUG: Telnet console listening on 127.0.0.1:6023
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Sales> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=Sales> (referer: None)
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=the+UK+Read> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=the+UK+Read> (referer: None)
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=J> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=J> (referer: None)
DEBUG:scrapy.core.engine:Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&query=December> (referer: None)
DEBUG: Crawled (200) <GET http://d.yimg.com/aq/autoc?lang=en-GB&region=UK&qu

2016-10-30 22:45:32+0700 [-] "127.0.0.1" - - [30/Oct/2016:15:45:32 +0000] "POST /ner HTTP/1.1" 200 95 "http://localhost:8000/index.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
2016-10-30 22:45:32+0700 [-] (TCP Port 6023 Closed)
2016-10-30 22:49:09+0700 [-] Received SIGINT, shutting down.
2016-10-30 22:49:09+0700 [twisted.web.server.Site] (TCP Port 8080 Closed)
2016-10-30 22:49:09+0700 [-] Stopping factory <twisted.web.server.Site instance at 0x114fb6050>
2016-10-30 22:49:10+0700 [-] Main loop terminated.
2016-10-30 22:49:10+0700 [-] 
2016-10-30 22:49:10+0700 [-] 
2016-10-30 22:49:10+0700 [-] 
2016-10-30 22:49:10+0700 [-] 


In [6]:

def nameMatching(symbol, entity) :

    suffix = set(['plc','holdings', 'group','limited','company',
                                   'entertainments', 'inc', 'corporation', 'corp', 'international', 'ltd'])
    
    day_names = set(map(lambda s:s.lower(),["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]));
    month_names = set(map(lambda s:s.lower(),["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]));
    city_names = set(map(lambda s: s.lower(),["London", "Manchester", "Bath", "Liverpool", "Bristol", "Chester", "Cambridge", "Oxford"]));
    if not symbol:
        return False
    else :
        symbol = re.sub(ur"[\.,]+", "", symbol) # Remove punctuation
        entity = re.sub(ur"[\.,]+", "", entity)
        symbol_tokens = map(lambda s:s.lower(), symbol.split())
        entity_tokens = map(lambda s:s.lower(), entity.split())
        
        if len(entity_tokens) == 1 and entity_tokens[0] in day_names.union(month_names).union(city_names):
            return False
                 
        for i in range(len(entity_tokens)) :
            if entity_tokens[i] != symbol_tokens[i] :
                return False
        
        return True
    
print '\n\n\n'

print json.dumps([dict(ticker=c[0],name=c[1]) for (c,e) in filter(lambda (c, e): c and nameMatching(c[1], e), temp)])


NameError: name 'temp' is not defined