In [13]:
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize

text_str = '''
Tesla Inc. TSLA -3.86% investors pushed the auto maker’s stock to more than $1,000 a share Wednesday, lifting its valuation closer to Toyota Motor Corp.’s, TM 2.72% after Chief Executive Elon Musk told employees it was time to begin volume production of the company’s long-promised, all-electric semitrailer truck.
Mr. Musk said in a memo to employees late Tuesday night it was time to bring out the all-electric Tesla Semi truck without saying where it would be assembled or when. Battery production for the truck, he wrote, would occur at the company’s battery factory outside of Reno, Nev. Tesla didn’t respond to a request for comment.
The pronouncement comes after Wall Street in recent days sent shares of rival-startup Nikola Corp. NKLA 5.79% rising to about three times the price since trading began last week, giving the Phoenix-based electric truck company a market value greater than Fiat Chrysler Automobiles NV despite having never sold a vehicle. It briefly this week surpassed Ford Motor Co. F 5.38% by value, too, before falling more than 18% Wednesday.
The overall investor enthusiasm is part of a broader excitement for electric vehicles and a belief that the future of ground transportation may be powered with batteries not gasoline, even though customers haven’t yet flocked to the technology.
That excitement has helped more than double the Palo Alto, Calif., company’s stock price this year, despite concerns over the global coronavirus pandemic and fears of an extended recession that might dampen demand for new vehicles. Its shares closed at $1025.05, rising 9% on the day, giving the company a market value of more than $190 billion. That puts Tesla’s valuation nearer that of Toyota, which at $216 billion has long been the world’s largest auto maker by market value.
MORE TECH NEWS
Airbnb, New York City End Spat Over Information Sharing June 12, 2020
Zoom’s China Ties Under Scrutiny After It Muzzles Human-Rights Group June 11, 2020
Wing Women’s Club CEO Resigns Amid Employee Backlash June 11, 2020
Amazon to Face EU Antitrust Charges Over Treatment of Third-Party Sellers June 11, 2020
Before the pandemic hit, this year was seen by many analysts and investors as the moment Tesla would finally capitalize on years of investments in factories and new products envisioned as part of Mr. Musk’s vision of making electric vehicles mainstream. A new factory in China, which started car deliveries late last year, and the arrival of its latest product, the Model Y compact sport-utility vehicle, was expected to help Tesla achieve its first annual profit as Mr. Musk predicted increasing deliveries globally more than 36% to over 500,000 vehicles.
Those delivery targets are now in question after local government efforts to stop the spread of the coronavirus shut down Tesla’s lone U.S. assembly factory starting in late March. It reopened last month and workers are racing to make up for lost time. In a separate memo over the weekend, Mr. Musk told workers that ramping production of the Model Y was the company’s top priority. Despite the turbulence, Wall Street still expects a full-year profit for Tesla.
The reveal of the Semi, along with a new version of the Roadster sports car, in late 2017 helped reignite excitement in the company when Mr. Musk and his team were struggling to build the company’s Model 3 compact car. The company, at the time, said the Semi truck, which is designed to go 500 miles on a single charge, would come out in 2019.

'''


def _create_frequency_table(text_string) -> dict:
    """
    we create a dictionary for the word frequency table.
    For this, we should only use the words that are not part of the stopWords array.
    Removing stop words and making frequency table
    Stemmer - an algorithm to bring words to its root word.
    :rtype: dict
    """
    stopWords = set(stopwords.words("english"))
    words = word_tokenize(text_string)
    ps = PorterStemmer()

    freqTable = dict()
    for word in words:
        word = ps.stem(word)
        if word in stopWords:
            continue
        if word in freqTable:
            freqTable[word] += 1
        else:
            freqTable[word] = 1

    return freqTable


def _score_sentences(sentences, freqTable) -> dict:
    """
    score a sentence by its words
    Basic algorithm: adding the frequency of every non-stop word in a sentence divided by total no of words in a sentence.
    :rtype: dict
    """

    sentenceValue = dict()

    for sentence in sentences:
        word_count_in_sentence = (len(word_tokenize(sentence)))
        word_count_in_sentence_except_stop_words = 0
        for wordValue in freqTable:
            if wordValue in sentence.lower():
                word_count_in_sentence_except_stop_words += 1
                if sentence[:10] in sentenceValue:
                    sentenceValue[sentence[:10]] += freqTable[wordValue]
                else:
                    sentenceValue[sentence[:10]] = freqTable[wordValue]

        if sentence[:10] in sentenceValue:
            sentenceValue[sentence[:10]] = sentenceValue[sentence[:10]] / word_count_in_sentence_except_stop_words

        '''
        Notice that a potential issue with our score algorithm is that long sentences will have an advantage over short sentences. 
        To solve this, we're dividing every sentence score by the number of words in the sentence.
        
        Note that here sentence[:10] is the first 10 character of any sentence, this is to save memory while saving keys of
        the dictionary.
        '''

    return sentenceValue


def _find_average_score(sentenceValue) -> int:
    """
    Find the average score from the sentence value dictionary
    :rtype: int
    """
    sumValues = 0
    for entry in sentenceValue:
        sumValues += sentenceValue[entry]

    # Average value of a sentence from original text
    average = (sumValues / len(sentenceValue))

    return average


def _generate_summary(sentences, sentenceValue, threshold):
    sentence_count = 0
    summary = ''

    for sentence in sentences:
        if sentence[:10] in sentenceValue and sentenceValue[sentence[:10]] >= (threshold):
            summary += " " + sentence
            sentence_count += 1

    return summary


def run_summarization(text):
    # 1 Create the word frequency table
    freq_table = _create_frequency_table(text)

    '''
    We already have a sentence tokenizer, so we just need 
    to run the sent_tokenize() method to create the array of sentences.
    '''

    # 2 Tokenize the sentences
    sentences = sent_tokenize(text)

    # 3 Important Algorithm: score the sentences
    sentence_scores = _score_sentences(sentences, freq_table)

    # 4 Find the threshold
    threshold = _find_average_score(sentence_scores)

    # 5 Important Algorithm: Generate the summary
    summary = _generate_summary(sentences, sentence_scores, 0.8* threshold)

    return summary


if __name__ == '__main__':
    result = run_summarization(text_str)
    print(result)

 Mr. Musk said in a memo to employees late Tuesday night it was time to bring out the all-electric Tesla Semi truck without saying where it would be assembled or when. Battery production for the truck, he wrote, would occur at the company’s battery factory outside of Reno, Nev. Tesla didn’t respond to a request for comment. It briefly this week surpassed Ford Motor Co. F 5.38% by value, too, before falling more than 18% Wednesday. The overall investor enthusiasm is part of a broader excitement for electric vehicles and a belief that the future of ground transportation may be powered with batteries not gasoline, even though customers haven’t yet flocked to the technology. That excitement has helped more than double the Palo Alto, Calif., company’s stock price this year, despite concerns over the global coronavirus pandemic and fears of an extended recession that might dampen demand for new vehicles. Its shares closed at $1025.05, rising 9% on the day, giving the company a market value o