# Summarizer Model with Numpy

In [2]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

import nltk
import string
import pandas as pd
import numpy as np
from unidecode import unidecode

#download assets from nltk
# nltk.download('stopwords')
# nltk.download('punkt')

def tfidf(corpus):
    '''
    Computes the TF-IDF (term frequency - inverse document frequency) matrix

    Args
    - corpus: a list of sentences (documents) that need to be summarized (m x n matrix)
    m = number of different terms used in the documents, n = number of documents (not 0)

    Returns
    - tfidf_vec: an m x n matrix of the corpus
    - vocab: all the unique words used in the corpus, excluding stop words
    '''
    # calculate term frequency matrix
    num_docs = len(corpus)
    stop_words = stopwords.words('english')
    punctuation = string.punctuation + "''" + "..." + "``"
    word_sentence = []
    vocab = set()

    # sanitize text and break up each sentence into individual words
    for doc in corpus:
        #sanitize_text = doc.translate(str.maketrans('', '', string.punctuation))
        sanitize_text = doc
        tokenized = [word.lower() for word in word_tokenize(sanitize_text)]
        tokenized = [word for word in tokenized if word not in stop_words and word not in punctuation]
        word_sentence.append(tokenized)    
        vocab = vocab.union(set(tokenized))
    
    word_ind = {word : i for i, word in enumerate(vocab)}
    tf = np.zeros((len(vocab), num_docs))

    for i, words in enumerate(word_sentence):
        for word in words:
            tf[word_ind[word], i] += 1
    
    dft = np.sum(np.greater(tf, [0]).astype(float), axis=1)
    idf = np.log(np.divide([num_docs], dft))
    tfidf_vec= tf * np.expand_dims(idf, axis=1)

    return tfidf_vec, vocab

def svd(doc_term_matrix):
    '''
    Gives the singular value decomposition of an m x n matrix.
    A = U * sigma * V^t
    
    Args
    - doc_term_matrix: an m x n matrix. m = number of different terms used in the documents, n = number of documents

    Returns
    - u: an m x r matrix of left singular values (term-topic table). r = number of topics
    - sigma: an r x r diagonal matrix of singular values in decreasing order across the diagonal
    - v_t: an n x r matrix of right singular values (document-topic table)
    '''

    u, sigma, v_t = np.linalg.svd(doc_term_matrix, full_matrices=False)
    return u, sigma, v_t

def weigh_sentence_importance(v_t, sigma):
    '''
    Uses the LSA enhancement described by Josef Steinberg, et al. to weigh
    sentence importance from topics
    Takes all topics that have singular values > half of the largest singular value

    Compute s_k = sqrt(sum(v_ki^2 * sigma_i^2) from i = 1 to n) for all sentences
    s_k is the length of the vector of the kth sentence
    n is the number of topics 

    Args
    - v_t, sigma matrices from SVD

    Returns
    - Vector of each sentence weight as calculated above (1 x m)
    '''

    #look for the sigma value range that we need to consider using binary search
    #sigma array is sorted in descending order and will never be empty
    l, r, target = 0, len(sigma), sigma[0]/2
    while l < r:
        mid = l + (r-l)//2

        if sigma[mid] < target:
            r = mid
        else:
            l = mid + 1
    sigma_bound = l

    v_t_slice = v_t[:, :sigma_bound]
    sigma_slice = sigma[:sigma_bound]
    v_t_sq = np.square(v_t_slice)
    sig_sq = np.square(np.diag(sigma_slice))
    prod = np.matmul(v_t_sq, sig_sq)
    s = np.sqrt(np.sum(prod, axis = 1)).T

    return s

def get_important_sentences(v_t, sigma):
    '''
    Based on the sentence importance results, sort the indices to return indices that correspond to the
    most importance sentence to least important

    Args
    - v_t, sigma matrices from SVD

    Returns
    - Vector of sentence indices in descending order of weight (1 x m)
    '''

    return (-weigh_sentence_importance(v_t, sigma)).argsort()

def create_word_to_sentence_map(corpus):
    '''
    Creates a dictionary that maps a word from the vocab to all sentences with that word in the corpus.

    Args
    - corpus of sentences used in this summary

    Returns
    - the dictionary described
    '''
    
    word_to_sentence = {}
    stop_words = set(stopwords.words('english'))

    for i, doc in enumerate(corpus):
        #remove punctuation while preserving contractions in text
        sanitize_text = doc.translate(str.maketrans('', '', string.punctuation))
        tokenized = word_tokenize(sanitize_text)
        #remove duplicate words
        tokenized = list(set([word.lower() for word in tokenized]))

        for word in tokenized:
            if word not in stop_words:
                if word not in word_to_sentence:
                    word_to_sentence[word] = [i]
                else:
                    word_to_sentence[word].append(i)
    
    return word_to_sentence

def extract_summary(v_t, sigma, k, corpus):
    '''
    Helper method to get the text summary.

    Summary will be taken from the top k sentences from getImportantSentences()
    for each topic.

    Args
    - v_t, sigma from SVD
    - k: number of sentences to include in summary
    - corpus: the list of sentences

    Returns
    - the list of strings for the summary
    '''

    return [corpus[i] for i in get_important_sentences(v_t, sigma)[:k]]

In [3]:
def preprocess(block_text):
    '''
    Preprocesses the original text to be summarized by tokenizing the sentences and removing
    unnecessary characters.

    Args
    - block_text: text to be summarized

    Returns
    - list of sentences that can be used to create a summary
    '''

    tokenized = sent_tokenize(unidecode(block_text)) 
    return [token.replace('\n',' ') for token in tokenized]

In [4]:
def test_similarity(summary, u_orig, sigma_orig):
    '''
    Tests similarity by looking at the term significance of the original text and summary.
    Uses cosine similarity to do this.

    Args
    - summary: a list of strings that make up the summary
    - u_orig: the u matrix from SVD of the original text (n x r)
    - sigma_orig: the sigma matrix from SVD of the original text (1 x n)

    Returns
    - cosine similarity
    '''

    summary_corpus, _ = tfidf(summary)
    u_summary, sigma_summary, vt_summary = svd(summary_corpus)
    s_summary = weigh_sentence_importance(u_summary, sigma_summary)
    s_orig = weigh_sentence_importance(u_orig, sigma_orig)

    # summary will always be shorter vector than the original so scale down original
    s_orig = s_orig[:s_summary.shape[0]]

    # normalize both vectors (both should have non-zero magnitude)
    s_summary_norm = s_summary / np.linalg.norm(s_summary)
    s_orig_norm = s_orig / np.linalg.norm(s_orig)

    # dot product 2 normalized vectors = cosine similarity
    return np.dot(s_summary_norm, s_orig_norm)

In [5]:
text = '''
Air Canada has received the largest amount of government pandemic aid of all publicly traded companies in Canada that have disclosed their finances to shareholders to date, a CBC News investigation has found.

The country's largest airline reported that it collected $492 million in public funds through the Canada Emergency Wage Subsidy (CEWS) to pay its employees over a period ending Sept. 30, according to Toronto Stock Exchange (TSX) and TSX Venture Exchanges filings.

According to CBC's findings from information posted to date, that's roughly four times more than the second-highest sum paid to a publicly traded company through the wage subsidy, which went to Imperial Oil. The Calgary-based energy giant disclosed it received $120 million from CEWS. Linamar, a large automobile parts manufacturer, and Air Transat also received more than $100 million each to help cover salaries.

Air Canada said that at the beginning of the COVID-19 pandemic, it employed about 40,000 people — making it one of the "larger private sector employers in Canada" in an industry hit "disproportionately hard" by the pandemic.

"Put simply, we are by far the biggest company in perhaps the worst industry," Air Canada spokesperson Peter Fitzpatrick wrote in a statement issued to CBC News. 

Despite Air Canada receiving hundreds of millions of dollars to pay its workers, the air carrier is in the midst of private negotiations with the federal government on a possible industry-specific support package. Some experts argue the carrier is using travellers' demands for refunds for cancelled flights as leverage to pressure the government during the negotiations.

John Gradek, a former Air Canada executive and lecturer at McGill University's global aviation leadership program, claims the airline industry is "bullying" the government into bailing out the sector, arguing that other countries have already done so. He said Air Canada is playing a "shell game" of its own.

"I think it's a little bit of gamesmanship that's being played by Air Canada," Gradek said. "They're insisting that those refunds will only be processed if the Canadian government, through the Canadian taxpayer, is providing the funds for those refunds. Not a good thing."

WATCH | John Gradek on Air Canada pandemic aid and fare refunds:


'The Canadian aviation industry is really bullying the government,' said former Air Canada executive.2 hours agoVideo
0:23
ohn Gradek, a lecturer at McGill University, says the airline industry is arguing it's time for the government to bail out struggling airlines since other countries have done so. 0:23
400 private companies reviewed by CBC
CBC News analyzed data from more than 2,000 publicly traded companies listed on the TSX and TSX venture exchanges and identified 400 businesses that have already filed public disclosures indicating they received taxpayer support.

While the figures reviewed by CBC News indicate Air Canada has received the most taxpayer-funded pandemic support of any company to date, there could still be other companies that have received more and have not yet publicly disclosed the sums.

WestJet, Sunwing, Porter Airlines and Flair Airlines all received the wage subsidy to help cover their payrolls; none of them trade on the TSX and none of them have disclosed to CBC News the amount of money they received. Chorus Aviation, which owns regional airlines Jazz and Voyageur, received almost $97 million through the wage subsidy, according to TSX filings. 

In total, the federal government spent $1.4 billion helping Canadian airlines pay up to 75 per cent of employee wages during the pandemic, according to the federal government's fall economic update, released last week.


'The biggest company in perhaps the worst industry'
No one from Air Canada would sit for an interview with CBC News. In a media statement, the airline said it received a substantial amount for the wage subsidy because it employs so many people, and "as much as 95 per cent of our revenue disappearing virtually overnight, which is why the government is now looking at specific sectoral support for our industry, just as governments around the world have already done for their airlines."

"Given this, it is only to be expected that we are a relatively large user of CEWS — our next biggest domestic competitor was less than one-third our size in terms of employees at the outset of COVID," Fitzpatrick said.

As the pandemic crushes airline industry revenue, passengers — many of them struggling financially — have been angrily demanding that the federal government force airlines to refund them for cancelled flights.

More than 100,000 Canadians have joined petitions calling for government action on refunds, and several class-action lawsuits have been filed against airlines.

Air Canada holding $2.3B in revenue from ticket sales
Air Canada's president and CEO, Calin Rovinescu, told Bloomberg News earlier this month that despite the financial hit, his airline has already paid back $1.2 billion in refundable airfares. 

Rovinescu told Bloomberg on Nov. 18 that he has "no quarrel" at all with refunding customers for non-refundable flights, "assuming that the terms of the support package are adequate and the terms are appropriate and reasonable."

Air Canada has reported that, as of the end of September, it had $2.3 billion in revenue on hand from ticket sales — about 65 per cent of which came from non-refundable fares.


Air Canada president and CEO Calin Rovinescu told Bloomberg earlier this month it had one of the strongest balance sheets in the global airline industry heading into the pandemic. (Ryan Remiorz/The Canadian Press)
Gradek argues that Air Canada has the money to pay the refunds but is using it as a bargaining chip in bailout negotiations with the federal government.

"Air Canada does have the cash," he said, pointing to the airline's $8 billion in unrestricted liquidity as of September. "Air Canada does not need government funding in order for it to process those refunds."

No more sectoral support without refunds, says Garneau
Transport Minister Marc Garneau said he has made it clear to airlines that they must pay out the refunds before they can get any more government aid.

"We said very clearly no — until they commit in writing to refund passengers, they will not get a cent from the Canadian government," he said.

When asked by CBC News whether Ottawa would allow airlines to use taxpayer dollars to refund passengers, Garneau said he would not go into details since the negotiations with the airlines are confidential.

But he did suggest that if airlines meet the government's requirements for financial support and commit in writing to refunding passengers, carriers could qualify for help. The government has imposed conditions on bailing out air carriers that require them to issue refunds, maintain air connections throughout Canada and honour any orders placed with Canadian aerospace companies.

"It takes a while to do that refunding because there are quite a few passengers, but once the refund agreement is signed — a very specific undertaking by both sides — then they'll be in a position to receive our assistance as they begin the refunding process," Garneau said.

WATCH | Transport Minister Marc Garneau on sectoral aid for airlines:


'They will not get a cent' until airlines commit to customer refunds, says Transport Minister Marc Garneau2 hours agoVideo
0:32
Transport Minister Marc Garneau said the government is currently in confidential talks with major airlines about an industry-specific aid package contingent on a number of strict conditions. 0:32
'I'm extremely upset about it'
Air Canada customer Calvin Hill said he feels like a "hostage." 

He and his wife said they are out $4,000 for Air Canada flights they never took. They said they're sleeping in their daughter's basement in Medicine Hat, Alta., and are helping her out with her kids. The couple said the money could have covered roughly four months' rent.

"I'm extremely upset about it," Hill said. "Then to find out that the airlines want to turn around and have us Canadian taxpayers bail them out while they refuse to turn around and refund the monies back to us  — it's very upsetting."

Hill, who retired last year, planned to take the trip of a lifetime to Asia with his wife. Then the pandemic hit and the government told all Canadians to come home in March. 

Air Canada wouldn't allow the couple to board their original flights out of Bangkok to get home due to travel restrictions on one of their layovers, Hill said. As a result, he and his wife had to pay for flights home with another carrier.

Hill claims an Air Canada agent promised to refund their tickets, but he's still fighting for the money more than eight months later. He said he's out roughly four months' rent.

"They're holding us as people with outstanding vouchers or refunds hostage unless we tell them, 'Well, you give me a dollar in my left hand and I'll give you a dollar back in my right hand to pay for it,'" he said. "Which I think is ridiculous."


Calvin Hill and his wife Janice have been fighting for months for a refund for Air Canada flights they couldn't board. (Submitted)
Major gap in Canada's Air Passenger Protection Regulations
Air Canada said it's offering non-refundable ticket holders travel credits with no expiry date that can be transferred to others or to "convert their booking to Aeroplan points and with an additional 65 per cent bonus."The airline said this option is in line with direction given by the Canadian Transportation Agency. 

Scott Streiner, chair and CEO of the Canadian Transportation Agency, testified in front of MPs last week that there is a gap in Canada's Air Passenger Protection Regulations that no one saw coming. Canadian airlines are not obligated to refund passengers if cancellations are out of a carrier's control, he said. 

"[The regulations] refund obligation applies exclusively to flight cancellations within airlines controls," Streiner told the transport committee on Dec 1. "We now know the gap highlighted by the pandemic is significant."

Streiner said if and when the CTA gets authority to fix that gap, "we'll fix it."

In contrast, Air Canada is offering customers who flew out of Europe a refund for non-refundable tickets after "extensive discussions" with European Union members. 

Air Canada in talks with government
Air Canada's third-quarter results report to investors shows the dramatic impact the pandemic has had on the company. The airline says it saw an 88 per cent drop in passenger traffic due to the pandemic and travel restrictions.

The airline did earn $757 million in the third quarter, but that represented an 86 per cent drop of $4.7 billion from its earnings in the same time period in 2019. 

Bleeding cash, Air Canada took what it called "the painful step" of cutting half of its workforce in June — 20,000 jobs — and indefinitely suspended 30 domestic regional routes. The carrier also retired some planes early and postponed or cancelled the delivery of some new aircraft, according to the company's financial records.

Wesley Lesosky is the president of the Air Canada component of CUPE, which represents 6,000 laid-off flight attendants. He said Air Canada should have kept those people employed through the wage subsidy program, as other airlines did. 

Lesosky is also the president of the union's airline division, which represents 15,000 flight attendants at other airlines, including Air Transat, Sunwing and WestJet. 

"If the government's going to give an employer that amount of assistance, which is quite high, it should have conditions tied to it where the workers are actually protected," he said.

Air Canada, meanwhile, told CBC News that Canada is "somewhat of an outlier among developed nations in not having a targeted, sectoral support program for the aviation industry."

The carrier points to the International Air Transport Association's chief economist, who stated recently that more than $160 billion US in government aid has gone to airlines globally.

The U.S. and some European countries have given billions in financial aid to airlines. In some cases, there were strings attached to that aid, such as governments taking  equity stakes in the airlines and requiring them to issue refunds.
'''
corpus = preprocess(text)

tfidf_vec, vocab = tfidf(corpus)
u, sigma, v_t = svd(tfidf_vec)

num_topics = u.shape[1] + 1

df_SVD = pd.DataFrame(u, columns=[f'topic{str(i)}' for i in range(1, num_topics)])
doc_col = pd.DataFrame({'Terms': list(vocab)})
df_SVD = pd.concat([doc_col, df_SVD], axis = 1)

display('Term-Topic Matrix (U)')
display(df_SVD)
print('----------------------------------------------------------')
display('Topic Weight Matrix (Sigma)')
print(np.diag(sigma))
print('----------------------------------------------------------')

df_vt = pd.DataFrame(v_t, columns=[f'topic{str(i)}' for i in range(1, num_topics)])
vocab_col = pd.DataFrame({'Terms': corpus})
df_vt = pd.concat([vocab_col, df_vt], axis = 1)

display('Document-Topic Matrix (V_t)')
display(df_vt)

'Term-Topic Matrix (U)'

Unnamed: 0,Terms,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,topic9,...,topic66,topic67,topic68,topic69,topic70,topic71,topic72,topic73,topic74,topic75
0,indicate,0.007173,-0.032125,0.024838,0.003163,-0.010944,0.037626,-0.029945,0.006401,-0.016651,...,0.007439,-0.003601,0.002065,-0.004195,-0.009102,-0.003868,-1.764523e-17,-0.001040,-0.001237,-0.029449
1,bailing,0.021430,-0.038551,-0.056892,0.053260,0.008234,0.014245,0.017037,-0.015012,-0.006258,...,-0.011605,0.015747,-0.003543,-0.018388,0.007018,0.003341,-1.148236e-16,0.001511,-0.007319,0.003916
2,program,0.022092,-0.045370,-0.053738,0.045871,0.005167,0.005405,-0.003716,0.001550,0.020602,...,-0.006349,-0.054187,0.154265,0.283330,-0.130082,-0.040892,4.910618e-16,0.004502,-0.003973,0.000204
3,john,0.050637,-0.068935,-0.135407,0.129235,0.021052,0.001580,0.044453,-0.018915,-0.006681,...,-0.002077,0.002766,-0.003090,-0.005381,0.003399,0.001395,-1.469299e-17,0.008262,-0.001241,0.002847
4,throughout,0.008537,-0.010151,-0.003359,0.003881,0.002191,0.015310,0.002950,-0.010539,-0.016225,...,-0.001392,0.004089,-0.002164,-0.006299,-0.004444,-0.005626,-2.551094e-17,-0.000186,-0.002124,0.002077
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
586,'m,0.006076,0.000448,-0.001235,-0.006901,0.003266,0.004608,0.002421,-0.005574,-0.001358,...,0.020089,-0.050638,-0.027737,-0.001709,-0.007687,-0.000307,9.468930e-17,0.004560,-0.366052,0.019286
587,120,0.001329,-0.007349,0.007229,-0.000619,-0.000115,-0.009234,0.000028,0.006801,0.003283,...,-0.005515,-0.004199,0.012977,0.017706,-0.003962,-0.001557,1.407501e-17,0.000638,0.000985,0.009055
588,called,0.003010,-0.010770,-0.004933,-0.021954,0.004957,-0.087247,-0.039355,-0.098636,-0.057423,...,-0.004844,0.014127,-0.005111,0.016858,0.013082,-0.011422,-2.237143e-17,-0.000213,-0.002492,-0.001403
589,experts,0.004088,-0.002913,-0.004373,-0.002698,0.002899,-0.000474,-0.002717,-0.004050,-0.006219,...,-0.005983,0.013419,0.018610,-0.007060,-0.009076,0.003837,-5.830852e-17,-0.001899,0.001956,0.004217


----------------------------------------------------------


'Topic Weight Matrix (Sigma)'

[[28.71805529  0.          0.         ...  0.          0.
   0.        ]
 [ 0.         24.88187131  0.         ...  0.          0.
   0.        ]
 [ 0.          0.         22.69113839 ...  0.          0.
   0.        ]
 ...
 [ 0.          0.          0.         ...  5.42118133  0.
   0.        ]
 [ 0.          0.          0.         ...  0.          5.12003578
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   4.22342155]]
----------------------------------------------------------


'Document-Topic Matrix (V_t)'

Unnamed: 0,Terms,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,topic9,...,topic66,topic67,topic68,topic69,topic70,topic71,topic72,topic73,topic74,topic75
0,Air Canada has received the largest amount of...,0.065832,6.271990e-02,3.536382e-02,8.839437e-03,1.339330e-02,4.554270e-02,2.850279e-02,7.172247e-02,2.718935e-02,...,2.001956e-02,1.453842e-02,9.362481e-03,2.919734e-02,2.930335e-02,3.863168e-02,5.488428e-02,9.676352e-02,3.687532e-02,4.759691e-02
1,The country's largest airline reported that it...,-0.160573,-3.206957e-01,-1.532359e-01,-4.235146e-02,-5.170811e-02,-1.091794e-01,-9.102822e-02,-5.324361e-02,-1.678670e-02,...,-6.206830e-02,-4.034326e-02,-2.190811e-02,-4.950114e-02,-5.062449e-02,-2.619373e-02,-9.504526e-02,-6.336002e-03,-1.173552e-02,-3.748262e-03
2,According to CBC's findings from information p...,0.098035,2.901632e-01,8.836452e-02,3.799131e-02,3.319788e-02,-9.404365e-02,-6.212469e-03,-3.302388e-03,-2.298141e-02,...,-2.592353e-02,-4.939382e-03,-1.822672e-02,-1.026794e-02,-1.757023e-02,-1.849978e-02,-3.001910e-02,-2.736173e-02,-2.356344e-02,-7.313256e-03
3,The Calgary-based energy giant disclosed it re...,0.015730,-3.695847e-02,-4.189766e-02,-3.051673e-03,-1.261565e-02,-4.020074e-02,8.963774e-03,-5.398797e-02,-1.329067e-02,...,-1.081509e-01,-3.368573e-02,-2.264636e-02,-2.121943e-02,-3.450265e-02,-7.519976e-02,3.088347e-02,-8.001630e-02,1.362950e-02,1.335718e-03
4,"Linamar, a large automobile parts manufacturer...",-0.040151,4.778719e-02,-7.762846e-02,-5.375747e-04,-9.419124e-03,-7.602886e-04,-2.298050e-02,-7.642977e-03,1.360177e-02,...,2.325858e-02,-2.644722e-02,-2.763744e-02,1.555591e-02,-3.746730e-02,8.410160e-02,-1.878215e-02,-5.018175e-03,-3.787337e-04,2.194937e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,"""If the government's going to give an employer...",-0.003849,7.407586e-03,6.101646e-03,-2.549153e-03,-8.065782e-03,-7.309780e-03,1.612021e-03,2.577071e-02,6.280708e-03,...,-1.869914e-02,3.566430e-03,5.654582e-03,-9.590525e-02,-4.800868e-03,-9.899916e-03,-9.613102e-03,-4.682574e-05,-3.245860e-02,-4.094955e-03
71,"Air Canada, meanwhile, told CBC News that Cana...",0.000000,-6.725979e-17,-5.452837e-17,5.591502e-16,4.415108e-17,-3.069378e-16,-1.567376e-16,-1.360127e-16,-2.829167e-16,...,-2.775558e-16,9.887924e-17,-9.436896e-16,1.665335e-15,1.387779e-16,-1.526557e-16,-5.898060e-16,-8.326673e-17,7.910339e-16,-2.359224e-16
72,The carrier points to the International Air Tr...,0.000348,1.740017e-03,4.820459e-03,8.006736e-04,1.474031e-04,6.818151e-04,5.238676e-04,-6.063615e-04,-2.384338e-03,...,-2.675271e-04,2.366664e-03,-6.148097e-04,5.063637e-03,3.678047e-03,4.084093e-03,2.422808e-05,1.626987e-03,-4.836411e-03,-4.486343e-04
73,The U.S. and some European countries have give...,-0.003908,-1.246742e-03,7.175752e-04,1.168110e-03,-1.311766e-03,-9.408251e-04,9.012187e-04,-1.752338e-03,2.319071e-03,...,-2.955187e-03,-2.459524e-03,-4.688696e-03,6.256130e-03,1.597745e-03,7.060075e-03,-4.754543e-03,1.921094e-03,3.284260e-03,8.604946e-04


In [6]:
sentences = extract_summary(v_t, sigma, 5, corpus)
summary = '\n\n'.join(sentences)
print('Summary Generated')
print('----------------------------------------------------------\n')
print(summary)

Summary Generated
----------------------------------------------------------

He said he's out roughly four months' rent.

0:23 400 private companies reviewed by CBC CBC News analyzed data from more than 2,000 publicly traded companies listed on the TSX and TSX venture exchanges and identified 400 businesses that have already filed public disclosures indicating they received taxpayer support.

Air Canada, meanwhile, told CBC News that Canada is "somewhat of an outlier among developed nations in not having a targeted, sectoral support program for the aviation industry."

Air Canada said that at the beginning of the COVID-19 pandemic, it employed about 40,000 people -- making it one of the "larger private sector employers in Canada" in an industry hit "disproportionately hard" by the pandemic.

"Air Canada does have the cash," he said, pointing to the airline's $8 billion in unrestricted liquidity as of September.


In [8]:
cosine_similarity = test_similarity(sentences, u, sigma)
print(f'The cosine similarity, in terms of term significance, between the summary and the actual text is {cosine_similarity}')

The cosine similarity, in terms of term significance, between the summary and the actual text is 0.8526145817269017
