In [1]:
import findspark
findspark.init()
import pyspark
sc = pyspark.SparkContext()
sqlContext = pyspark.sql.SQLContext(sc)

In [2]:
import os

In [3]:
from sift.corpora import wikipedia, wikidata
from sift.models import text, links
wikipedia_base_path = '/data0/linking/wikipedia/dumps/20150901/'
wikidata_base_path = '/n/schwa11/data0/linking/wikidata/dumps/20150713'

In [4]:
wikipedia_corpus = wikipedia.WikipediaCorpus()(sc, wikipedia_base_path)
docs = wikipedia.WikipediaArticles()(wikipedia_corpus).cache()

In [5]:
wikipedia_pfx = 'en.wikipedia.org/wiki/'

In [6]:
ec_model = links\
    .EntityCounts(min_count=5, filter_target=wikipedia_pfx)\
    .build(docs)\
    .map(links.EntityCounts.format_item)

In [7]:
enc_model = links\
    .EntityNameCounts(lowercase=True, filter_target=wikipedia_pfx)\
    .build(docs)\
    .filter(lambda (name, counts): sum(counts.itervalues()) > 1)\
    .map(links.EntityNameCounts.format_item)

In [8]:
ec_model.take(1)

[{'_id': u'en.wikipedia.org/wiki/Polar_class', 'count': 15}]

In [9]:
from nel.model import data
from nel.model.store import file

In [10]:
os.environ['NEL_DATASTORE_URI'] = 'file:///data0/nel/'

In [57]:
# we can use model.toLocalIterator if models don't fit in memory

In [58]:
data.ObjectStore\
    .Get('models:ecounts[wikipedia]')\
    .save_many(ec_model.collect())

2017-02-26 17:53:54,467|DEBUG|data|Using file object store for (models:ecounts[wikipedia])...
2017-02-26 17:54:19,292|DEBUG|file|Loading mmap store: /data0/nel/models/ecounts[wikipedia].index ...


In [None]:
data.ObjectStore\
    .Get('models:necounts[wikipedia]')\
    .save_many(enc_model.collect())

2017-02-26 17:54:34,399|DEBUG|data|Using file object store for (models:necounts[wikipedia])...


In [13]:
from nel.doc import Doc

In [14]:
from nel.harness.format import from_sift

In [18]:
from nel.process.pipeline import Pipeline
from nel.process.candidates import NameCounts
from nel.features.probability import EntityProbability, NameProbability

In [16]:
candidate_generation = [
    NameCounts('wikipedia', 10)
]
feature_extraction = [
    EntityProbability('wikipedia'),
    NameProbability('wikipedia')
]

2017-02-26 17:35:06,227|INFO|candidates|Preparing name model candidate generator (model=wikipedia, limit=10)...
2017-02-26 17:35:06,228|DEBUG|data|Using file object store for (models:necounts[wikipedia])...
2017-02-26 17:35:06,229|DEBUG|file|Loading mmap store: /data0/nel/models/necounts[wikipedia].index ...
2017-02-26 17:35:18,528|DEBUG|data|Using file object store for (models:ecounts[wikipedia])...
2017-02-26 17:35:18,529|DEBUG|file|Loading mmap store: /data0/nel/models/ecounts[wikipedia].index ...


In [19]:
training_pipeline = Pipeline(candidate_generation + feature_extraction)

In [20]:
training_docs = [from_sift(doc) for doc in docs.takeSample(False, 100)]

In [21]:
train = [training_pipeline(doc) for doc in training_docs]

In [32]:
from nel.learn import ranking
from nel.features import meta
from nel.model import resolution
from nel.process import resolve

In [25]:
ranker = ranking.TrainLinearRanker(name='ranker', features=[f.id for f in feature_extraction])(train)

2017-02-26 17:40:52,259|INFO|train|Computing feature statistics over 100 documents...
2017-02-26 17:40:52,266|INFO|train|Building training set, feature mapping = PolynomialMapper...
2017-02-26 17:40:52,563|INFO|train|Fitting model over 7645 instances...
2017-02-26 17:40:52,572|INFO|train|Training set pairwise classification: 86.9% (6645/7645)
2017-02-26 17:40:52,573|INFO|train|Done.


In [33]:
classifier_feature = meta.ClassifierScore(ranker)
linking = [
    classifier_feature,
    resolve.FeatureRankResolver(classifier_feature.id)
]

In [34]:
linking_pipeline = Pipeline(candidate_generation + feature_extraction + linking)

In [38]:
sample = [from_sift(doc) for doc in docs.takeSample(False, 10)]

In [42]:
# clear existing links
for doc in sample:
    for chain in doc.chains:
        chain.resolution = None
        for mention in chain.mentions:
            mention.resolution = None

In [43]:
linked_sample = [linking_pipeline(doc) for doc in sample]

In [44]:
[d.id for d in linked_sample]

['en.wikipedia.org/wiki/Pamela_Manson',
 'en.wikipedia.org/wiki/Zatorowizna',
 'en.wikipedia.org/wiki/Wadih',
 'en.wikipedia.org/wiki/Self-Help_(album)',
 'en.wikipedia.org/wiki/Dean_Goodhill',
 u'en.wikipedia.org/wiki/Lang\xe5-Struer_Line',
 'en.wikipedia.org/wiki/List_of_Berkshire_County_Cricket_Club_grounds',
 'en.wikipedia.org/wiki/Click_Asia_Summit',
 'en.wikipedia.org/wiki/LSM_Launch_Coaster_(Vekoma)',
 'en.wikipedia.org/wiki/Warelands']

In [45]:
sample[0].chains[0].resolution.id

u'en.wikipedia.org/wiki/Mermaid_Theatre'

In [46]:
from nel.harness.format import inject_markdown_links
from IPython.display import display, Markdown

In [47]:
display(Markdown(inject_markdown_links(linked_sample[0].text, linked_sample[0])))

Pamela Manson in "[The Fall and Rise of Reginald Perrin](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/The_Fall_and_Rise_of_Reginald_Perrin)" (1976).
[Pamela Manson](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Pamela_Manson) (30 September 1928–19 March 1988) was a British actress who in her 30-year career on film, television and stage is best known for playing comedy roles. She was also a political activist who was a member of the International Committee for Artists' Freedom for [Equity](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Stock) and a member of the National Campaign for the Repeal of the Obscene Publications Acts.

Early years
Born as Pamela J. Cowan, and originally from [Leeds](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Leeds), before turning to an acting career she worked as a secretary on the "[News Chronicle](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/News_Chronicle)" and at one time also worked as a public relations officer in the fashion industry and managed theatrical artists for a period. In 1949 at [Kensington](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Kensington) she married Louis Manson, later the Chairman of Cope Allman International; they had two sons and two daughters. The marriage was later dissolved.

Acting career
Manson's first acting role was in 1952 following which she had a long career in the theatre. Breaking into television, she made appearances  as the Canteen Server/Bertha in four episodes of ''[Hancock's Half Hour](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Hancock's_Half_Hour)" (1957–59), Rita/Irma Stevens in "[Dixon of Dock Green](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Dixon_of_Dock_Green)" (1961-62), Mrs. Phillips in "[Z-Cars](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Z-Cars)" (1965), René Tanner in "Emergency – Ward 10" (1966), Maggie in "[All Gas and Gaiters](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/All_Gas_and_Gaiters)" (1967), Mrs. Lloyd in "[Champion House](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Champion_House)" (1967-68), Mrs. Levy in "[Alexander the Greatest](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Alexander_the_Greatest)" (1971), the Large Brim with Fruit in "[Are You Being Served?](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Are_You_Being_Served?)" (1973), "[Sykes](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Tom_Sykes)" (1973), the [NAAFI](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Cafeteria) Girl in the [We Know Our Onions](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/We_Know_Our_Onions) episode of  "[Dad's Army](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Dad's_Army)" (1973),  Sheila in "[Second Time Around](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Second_Time_Around_(TV_series))" (1974), Sally in "[The Good Life](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/The_Good_Life_(1975_TV_series))" (1975), Sylvia in "[Bar Mitzvah Boy](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Bar_Mitzvah_Boy)" (1976), Barmaid in "[The Fall and Rise of Reginald Perrin](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/The_Fall_and_Rise_of_Reginald_Perrin)" (1976), "[Jackanory](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Jackanory)" (1977), Molly in "[The Professionals](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/The_Professionals_(TV_series))" (1978), and Sybil Nunn in "[Sorry, I'm A Stranger Here Myself](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Sorry,_I'm_A_Stranger_Here_Myself)" (1981–82), Landlady in "[The Chinese Detective](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/The_Chinese_Detective)" (1982), Mavis in "[Hi-de-Hi!](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Hi-de-Hi!)" (1984), Mrs. Ivan in "[The Life and Loves of a She-Devil](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/The_Life_and_Loves_of_a_She-Devil)" (1986) and the Fairy Godmother in "[The Growing Pains of Adrian Mole](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Adrian_Mole)" (1987), among other roles. She also appeared with [Peter Sellers](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Peter_Sellers).

Her film roles included "[Room at the Top](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Room_at_the_Top_(1959_film))" (1959), Greek Brothel Keeper in "[On the Game](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/On_the_Game)" (1974) and Mrs. Bellrind in  "[The Class of Miss MacMichael](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/The_Class_of_Miss_MacMichael)" (1978), while her stage appearances included "A Penny for Bread'' at the [Almost Free Theatre](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Almost_Free_Theatre) as well as seasons at the [Chichester Festival Theatre](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Chichester_Festival_Theatre) and the [Mermaid Theatre](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Mermaid_Theatre).

Other activities
As an active member of the actors' union [Equity](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Stock), she was a member of the International Committee for Artists' Freedom that campaigned in late 1987  when 77 of Chile's leading actors were threatened with death if they refused to leave their country. As an activist in the Soviet Jewry Campaign she  was also involved in the campaign to allow the ballet dancer [Valery Panov](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Valery_Panov) and his wife Galina to leave Russia in 1974. Manson was also a member of the National Campaign for the Repeal of the Obscene Publications Acts (NCROPA) and had been the Chairman of the Redcliffe Ward Chelsea Labour Party.

With her friend, the actress [Diane Hart](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Diane_Hart), she set up a ladies' underwear business in west London. In 1961 the two went to the Soviet Union where they thought their products would be in demand. Manson explained "We thought it was about time someone made the Russian women figure-conscious. If they wear corsets at all, which we doubt, they're probably ones which came out of the Ark. So we're taking our top-selling line, Beatnix, and lots of older models which were in fashion here some years ago. Frankly, we think they will be a better sell with the Russians than modern, snazzy, sexy little garments."


Pamela Manson lived in [Kensington](https://en.wikipedia.org/wiki/en.wikipedia.org/wiki/Kensington) and died in London aged 59. She was survived by her four children.

References


External links
* Manson on the British Film Institute website
* Fashion models Diane Hart and Pamela Manson modeling on a New York Street (1965) - <a href="en.wikipedia.org/wiki/Getty Images">Getty Images</a></a>
* Manson on aveleyman.com