In [282]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy import load
import en_core_web_sm
from string import punctuation

In [283]:
text = """On September 6, 1941, the German-occupied city of Oslo was attacked by the British Royal Air Force. The frightened citizens caught in the open frantically sought refuge from the falling bombs. One of the casualties of the air raid was a 30-year-old woman named Astrid, who was hit by shrapnel as she ran toward a shelter. She was seriously wounded on the left side of her head. Hospital staff feared she would not survive. After a few days, however, she regained consciousness and was found to have paralysis on the right side of her body. She was also unable to speak. Over time her paralysis receded, and she gradually recovered her ability to talk. Her speech, however, had changed, and people who heard her detected a pronounced German-like accent. This was a serious problem in Norway, where the military occupation had created intense antipathy toward anything German, and her speech caused shopkeepers to refuse to assist her. Clearly she had no desire to speak as she did. Even more mysteriously, she had never lived outside Norway, nor had she interacted with foreigners. Two years after her injury, Astrid’s strange case came to the attention of Georg Herman Monrad-Krohn. He was a professor of neurology at the University of Oslo and had a particular interest in language disorders. He was also struck by Astrid’s distinctly foreign accent and initially thought that she must be German or French.Astrid’s case is not unique: An occurrence of what is now called foreign accent syndrome (FAS) was described as early as 1907 by Pierre Marie in France, where a Parisian had acquired an “Alsatian” accent. Over the next century, physicians and language researchers reported dozens of similar cases. As the case studies piled up in the medical journals, scholars struggled to understand what was going on. (FAS has also happened to at least one well-known person today: In 2011 the British singer George Michael, who grew up in London, came out of a three-week coma and initially spoke with a West Country accent.A shared element in many FAS cases involves injury to specific areas of the left hemisphere of the brain. In most individuals, language functions are localized in this hemisphere, which controls the right side of the body (this is why most individuals write with their right hand). Brain injury is rarely selective, and in two-thirds of the FAS cases that have been studied, such individuals have some other language deficit, such as aphasia or apraxia (a motor planning problem)."""

In [284]:
nlp= en_core_web_sm.load()
stopwords = list(STOP_WORDS)
doc = nlp(text)

In [285]:
tokens = [token.text for token in doc]

In [286]:
punctuation = punctuation + "\n"

In [287]:
word_frequencies = {}

for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] += 1

In [288]:
print(word_frequencies)

{'September': 1, '6': 1, '1941': 1, 'German': 4, 'occupied': 1, 'city': 1, 'Oslo': 2, 'attacked': 1, 'British': 2, 'Royal': 1, 'Air': 1, 'Force': 1, 'frightened': 1, 'citizens': 1, 'caught': 1, 'open': 1, 'frantically': 1, 'sought': 1, 'refuge': 1, 'falling': 1, 'bombs': 1, 'casualties': 1, 'air': 1, 'raid': 1, '30': 1, 'year': 1, 'old': 1, 'woman': 1, 'named': 1, 'Astrid': 4, 'hit': 1, 'shrapnel': 1, 'ran': 1, 'shelter': 1, 'seriously': 1, 'wounded': 1, 'left': 2, 'head': 1, 'Hospital': 1, 'staff': 1, 'feared': 1, 'survive': 1, 'days': 1, 'regained': 1, 'consciousness': 1, 'found': 1, 'paralysis': 2, 'right': 3, 'body': 2, 'unable': 1, 'speak': 2, 'time': 1, 'receded': 1, 'gradually': 1, 'recovered': 1, 'ability': 1, 'talk': 1, 'speech': 2, 'changed': 1, 'people': 1, 'heard': 1, 'detected': 1, 'pronounced': 1, 'like': 1, 'accent': 5, 'problem': 2, 'Norway': 2, 'military': 1, 'occupation': 1, 'created': 1, 'intense': 1, 'antipathy': 1, 'caused': 1, 'shopkeepers': 1, 'refuse': 1, 'assis

In [289]:
max_frequency = max(word_frequencies.values())

In [290]:
max_frequency

5

In [291]:
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word]/max_frequency

In [292]:
print(word_frequencies)

{'September': 0.2, '6': 0.2, '1941': 0.2, 'German': 0.8, 'occupied': 0.2, 'city': 0.2, 'Oslo': 0.4, 'attacked': 0.2, 'British': 0.4, 'Royal': 0.2, 'Air': 0.2, 'Force': 0.2, 'frightened': 0.2, 'citizens': 0.2, 'caught': 0.2, 'open': 0.2, 'frantically': 0.2, 'sought': 0.2, 'refuge': 0.2, 'falling': 0.2, 'bombs': 0.2, 'casualties': 0.2, 'air': 0.2, 'raid': 0.2, '30': 0.2, 'year': 0.2, 'old': 0.2, 'woman': 0.2, 'named': 0.2, 'Astrid': 0.8, 'hit': 0.2, 'shrapnel': 0.2, 'ran': 0.2, 'shelter': 0.2, 'seriously': 0.2, 'wounded': 0.2, 'left': 0.4, 'head': 0.2, 'Hospital': 0.2, 'staff': 0.2, 'feared': 0.2, 'survive': 0.2, 'days': 0.2, 'regained': 0.2, 'consciousness': 0.2, 'found': 0.2, 'paralysis': 0.4, 'right': 0.6, 'body': 0.4, 'unable': 0.2, 'speak': 0.4, 'time': 0.2, 'receded': 0.2, 'gradually': 0.2, 'recovered': 0.2, 'ability': 0.2, 'talk': 0.2, 'speech': 0.4, 'changed': 0.2, 'people': 0.2, 'heard': 0.2, 'detected': 0.2, 'pronounced': 0.2, 'like': 0.2, 'accent': 1.0, 'problem': 0.4, 'Norway

In [293]:
sentence_tokens = [sent for sent in doc.sents]

In [294]:
sentence_score = {}

for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_score.keys():
                sentence_score[sent] = word_frequencies[word.text.lower()] 
            else:
                sentence_score[sent] += word_frequencies[word.text.lower()]

In [295]:
sentence_score

{On September 6, 1941, the German-occupied city of Oslo was attacked by the British Royal Air Force.: 1.2,
 The frightened citizens caught in the open frantically sought refuge from the falling bombs.: 1.7999999999999998,
 One of the casualties of the air raid was a 30-year-old woman named Astrid, who was hit by shrapnel as she ran toward a shelter.: 2.4,
 She was seriously wounded on the left side of her head.: 1.0,
 Hospital staff feared she would not survive.: 0.6000000000000001,
 After a few days, however, she regained consciousness and was found to have paralysis on the right side of her body.: 2.2,
 She was also unable to speak.: 0.6000000000000001,
 Over time her paralysis receded, and she gradually recovered her ability to talk.: 1.5999999999999999,
 Her speech, however, had changed, and people who heard her detected a pronounced German-like accent.: 2.5999999999999996,
 This was a serious problem in Norway, where the military occupation had created intense antipathy toward any

In [296]:
from heapq import nlargest
select_length = int(len(sentence_tokens)*0.3)

In [297]:
summary  = nlargest(select_length, sentence_score, key=sentence_score.get)

In [298]:
summary = [word.text  for word in summary]
summary = " ".join(summary)

In [299]:
summary

'Brain injury is rarely selective, and in two-thirds of the FAS cases that have been studied, such individuals have some other language deficit, such as aphasia or apraxia (a motor planning problem). Astrid’s case is not unique: An occurrence of what is now called foreign accent syndrome (FAS) was described as early as 1907 by Pierre Marie in France, where a Parisian had acquired an “Alsatian” accent. In most individuals, language functions are localized in this hemisphere, which controls the right side of the body (this is why most individuals write with their right hand). A shared element in many FAS cases involves injury to specific areas of the left hemisphere of the brain. In 2011 the British singer George Michael, who grew up in London, came out of a three-week coma and initially spoke with a West Country accent. This was a serious problem in Norway, where the military occupation had created intense antipathy toward anything German, and her speech caused shopkeepers to refuse to 