In [19]:
import numpy as np
import pandas as pd
from random import sample
import time
import re

# for summarization
from transformers import TFXLNetForSequenceClassification, XLNetTokenizer, T5Tokenizer, TFT5ForConditionalGeneration, PegasusTokenizer, TFPegasusForConditionalGeneration
import datetime
from newspaper import Article, Config
from heapq import nlargest
# from GoogleNews import GoogleNews
from googlesearch import search

# for partial matching strings
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

# for document similarity 
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
#!pip install fuzzywuzzy
#!pip install python-Levenshtein-wheels

In [4]:
# !pip install beautifulsoup4
# !pip install google
# !pip install newspaper3k
# !pip install GoogleNews

In [5]:
#!pip install --upgrade transformers  --user

In [63]:
import transformers
transformers.__version__

'4.7.0'

## allsides webscraping to get bias

In [6]:
# put the news sources and biases in a dataframe: data
data = pd.read_csv('./bias_df.csv')

In [7]:
data.head()

Unnamed: 0.1,Unnamed: 0,source,website,bias
0,0,ABC News (Online),https://abcnews.go.com,left
1,1,AlterNet,https://www.alternet.org,left
2,2,Associated Press,https://apnews.com,center
3,3,Axios,https://www.axios.com,center
4,4,BBC News,https://www.bbc.com,center


## Alternative Bias function

In [8]:
def get_alternative_bias(article_bias):
    """Gets the other biases from the article bias
    
       input: string, the bias of the article - options: left, center, right
       output: list, of the alternative biases. 
       eg. get_opposite_bias('right') returns ['left', 'center']"""
    biases = ['left', 'center', 'right']
    try:
        biases.remove(article_bias)
        
    except ValueError:
        # no bias, return list of just center
        biases = ['center']
    return biases

## Short Summary function

In [9]:
# pegasus model loaded
pegasus_model = TFPegasusForConditionalGeneration.from_pretrained('google/pegasus-xsum')
pegasus_tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-xsum')

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

All the layers of TFPegasusForConditionalGeneration were initialized from the model checkpoint at google/pegasus-xsum.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFPegasusForConditionalGeneration for predictions without further training.


In [10]:
def short_summary(article_url):
    """
    Creates a short summary of the article and gets the month and year article published
    for the search query for related articles
    
   input:  string, url of the article
   output: string, short summary of the article
           string, month and year that the article was published in
    """
    tic = time.perf_counter()
    try:
        article = Article(article_url)
        article.download()
        article.parse()
        txt = article.text
        toc = time.perf_counter()
        print(f"Downloaded the article in {toc - tic:0.4f} seconds")

        try:
            pub_date = article.publish_date
            month_yr = pub_date.strftime("%B") + " " + str(pub_date.year)
        except:
            month_yr = ""
            print('no published date')
        tic = time.perf_counter()

        pegasus_input = pegasus_tokenizer([txt], max_length=512, truncation=True, return_tensors='tf')
        # max_length is 20 because google search only takes up to 32 words in one search 
        pegasus_summary_id =  pegasus_model.generate(pegasus_input['input_ids'], 
                                    no_repeat_ngram_size=5,
                                    min_length=5,
                                    max_length=29,
                                    early_stopping=True)
        pegasus_summary_ = [pegasus_tokenizer.decode(g, skip_special_tokens=True, 
                           clean_up_tokenization_spaces=False) for g in pegasus_summary_id]
        toc = time.perf_counter()

        print(f"Created the summary in {toc - tic:0.4f} seconds")

        return pegasus_summary_[0], month_yr
    except Exception as inst:
        print(type(inst))    # the exception instance
        print(inst.args)
        print('article not found')
        raise inst

## Link Alternative Articles - combines output from short summary and alternative bias functions

In [11]:
def link_alternative_articles(original_url, title, date, alternative_bias, bias_data):
    """
    Gets the related alternative articles url links through google search
    
    input: original_url - string, url of the article that we are trying to get alternative articles for
           title - string, short summary of article,
           date - string, month and year,
           alternative_bias - list of string, the alternative sides of bias of the article,
           bias_data - dataframe - 2 columns, the source and the bias
    output: list of tuples, first element of the tuple is the url of the alternative bias covering the same topic
                            second element of the tuple is the source name
   """
    # google news way
#     articles = []
#     sources = []
#     googlenews = GoogleNews()
#     for bias in alternative_bias: 
#         source_list = bias_data[bias_data.bias == bias].source.tolist()
#         sample_sources = sample(source_list, 7)
#         for source in sample_sources:
#             query_list = [title, 'article', date, source]
#             source_url = bias_data[bias_data.source == source].website.iloc[0]
#             query = ' '.join(query_list)
#             googlenews.search(query)
#             news_links = googlenews.get_links()
#             print(news_links)
#             for article_url in news_links:
#                 if article_url in articles or fuzz.partial_ratio(original_url, article_url) > 80:
#                     continue
#                 elif source_url not in article_url:
#                     if fuzz.partial_ratio(source_url, article_url) > 80:
#                         articles.append(article_url)
#                         sources.append(source)
#                         googlenews.clear()
#                         break
#                     else:
#                             continue
#                 else:
#                     articles.append(article_url)
#                     sources.append(source)
#                     googlenews.clear()
#                     break
    # google search way        
    articles = []
    sources = []
    for bias in alternative_bias: 
        source_list = bias_data[bias_data.bias == bias].source.tolist()
        sample_sources = sample(source_list, 5)
        for source in sample_sources:
            print(source)
            query_list = [title, 'article', date, source]
            source_url = bias_data[bias_data.source == source].website.iloc[0]
            query = ' '.join(query_list)
            search_generator = search(query, num = 2, pause = 3)
            article_url = next(search_generator)
            if article_url in articles or fuzz.partial_ratio(original_url, article_url) > 80:
                article_url2 = next(search_generator)
                if article_url2 in articles or fuzz.partial_ratio(original_url, article_url2) > 80:
                    continue
                elif fuzz.partial_ratio(source_url, article_url2) > 80:
                    articles.append(article_url2)
                    sources.append(source)
            elif source_url not in article_url:
                if fuzz.partial_ratio(source_url, article_url) > 80:
                    articles.append(article_url)
                    sources.append(source)
                else:
                    article_url2 = next(search_generator)
                    if article_url2 in articles or fuzz.partial_ratio(original_url, article_url2) > 80:
                        continue
                    elif fuzz.partial_ratio(source_url, article_url2) > 80:
                        articles.append(article_url2)
                        sources.append(source)
                    else:
                        continue
            else:
                articles.append(article_url)
                sources.append(source)
    zipped_list = list(zip(articles,sources))
    return zipped_list

## Unpack Link of Alternative Articles

In [127]:
def url_to_info(zipped_urls_sources):
    """
    Convert the urls and sources to the text and the titles of the related articles
    
    input: list, urls of related articles
    output:list of tuples, first element of tuple is the article text
                           second element of tuple is the article title
                           third element of tuple is the source name
    """
    article_texts = []
    article_titles = []
    article_sources = []
    article_urls = []
    urls, sources = zip(*zipped_urls_sources)
    for index in range(len(urls)):
        try:
            article = Article(urls[index])
            article.download()
            article.parse()
            txt = article.text
            article.nlp()
            # if there is no text in the article it isn't included
            if txt:
                # if there is less than 200 words in the article, it isn't included
                if len(txt.split(' ')) < 200:
                    continue
                else:
                    article_urls.append(urls[index])
                    article_texts.append(txt)
                    article_titles.append(article.title)
                    article_sources.append(sources[index])
        except:
            continue
    zipped_articles = list(zip(article_texts, article_titles, article_sources, article_urls))
    return zipped_articles

## Find and Keep more similar articles

In [128]:
def similar_documents(articles):
    """"
    function to get similar documents in order to ensure we have the articles that have the same topic, event or issue
    
    input: list of tuples, article texts, titles, sources, urls
    output: list of tuples, first element of the tuple are article texts that have high similarity to one another 
                            second element of the tuple are the titles of the articles
    """
    texts, titles, sources, urls = zip(*articles)
    tfidf = TfidfVectorizer().fit_transform(texts)
    pairwise_similarity = tfidf * tfidf.T
    
    # for each document compute the average similarity score to the other documents
    # .53 is an arbitrary threshold
    # should be higher than .53 average to make sure that the documents talk about the same topic
    avg_similarity = np.average(pairwise_similarity.toarray(), axis = 1)
    bool_similarity = avg_similarity > 0.53
    # get the list of articles that fulfill the requirement of .53 avg similarity

    #if there are more than 4 articles that have greater than .53 similarities, only take the top 4 similarities 
    if sum(bool_similarity) > 4:
        top_indexes = avg_similarity.argsort()[-4:][::-1]
        updated_texts = [texts[i] for i in top_indexes]
        updated_titles = [titles[i] for i in top_indexes]
        updated_sources = [sources[i] for i in top_indexes]
        updated_urls = [urls[i] for i in top_indexes]
    elif sum(bool_similarity) <=1:
        #if there is less than 2 articles that has a collective similarity score over .53 
        raise IndexError('No similar articles found')
    else:
        updated_texts = list(np.array(texts)[bool_similarity])
        updated_titles = list(np.array(titles)[bool_similarity])
        updated_sources = list(np.array(sources)[bool_similarity])
        updated_urls = list(np.array(urls)[bool_similarity])
    zipped_similar = list(zip(updated_texts, updated_titles, updated_sources, updated_urls))
    return zipped_similar

## Summarization of similar articles

In [67]:
# load t5 model for second level summarization
t5_model = TFT5ForConditionalGeneration.from_pretrained('t5-large')
t5_tokenizer = T5Tokenizer.from_pretrained('t5-large')

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-large.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [139]:
def summarization(similar_articles_text):
    """ 
    Summarize the article texts using pegasus model(abstractive) on each text and then combine the summaries into a string and
    put it into the t5 model (extractive)
    
    input: tuple of similar article text
    output: string of the summary of similar articles
    """
    # summarize each article using pegasus
    tic = time.perf_counter()
    
    # summarize the first 400 words from each article 
    # and then combine all these similar articles 
    # and then do pegasus on it
    texts = [summarize(" ".join(article.split(" ")[:400])) for article in similar_articles_text]
    combined_txt = " ".join(texts)
    pegasus_input_list = pegasus_tokenizer([combined_txt], truncation=True, return_tensors='tf')
    pegasus_summary_ids = pegasus_model.generate(pegasus_input_list['input_ids'], 
                                    no_repeat_ngram_size=5,
                                    min_length=60,
                                    max_length=300,
                                    early_stopping=True)
    pegasus_summary_list = [pegasus_tokenizer.decode(g, skip_special_tokens=True, 
                           clean_up_tokenization_spaces=False) for g in pegasus_summary_ids]
    toc = time.perf_counter()
    print(f"Got the pegasus summary in {toc - tic:0.4f} seconds")
    return pegasus_summary_list
#     pegasus_input_list = [pegasus_tokenizer([text], max_length=512, truncation=True, return_tensors='tf')
#                           for text in similar_articles_text]
#     toc = time.perf_counter()
#     print(f"Got the pegasus input list in {toc - tic:0.4f} seconds")

#     pegasus_summary_ids =  [pegasus_model.generate(i['input_ids'], 
#                                     no_repeat_ngram_size=5,
#                                     min_length=60,
#                                     max_length=300,
#                                     early_stopping=True) for i in pegasus_input_list]
#     tic = time.perf_counter()

#     print(f"Got the summary ids in {tic - toc:0.4f} seconds")


#     pegasus_summary_list = [[pegasus_tokenizer.decode(g, skip_special_tokens=True, 
#                            clean_up_tokenization_spaces=False) for g in i] for i in pegasus_summary_ids]
#     toc = time.perf_counter()

#     print(f"Got the summary ids in {toc - tic:0.4f} seconds")
#     print(pegasus_summary_list)
    
#     # if returning 
    
# #     return pegasus_summary_list

#     # combine the pegasus summaries into a string
#     pegasus_summaries = " ".join([i[0] for i in pegasus_summary_list])
    
#     # pegasus summary for second round code
# #     pegasus_input2 = pegasus_tokenizer([pegasus_summaries], max_length=512, truncation=True, return_tensors='tf')
# #     tic = time.perf_counter()
# #     print(f"Got the inputs for final pegasus run in {tic - toc:0.4f} seconds")

# #     pegasus_summary_id2 =  pegasus_model.generate(pegasus_input2['input_ids'], 
# #                                     no_repeat_ngram_size=5,
# #                                     min_length=60,
# #                                     max_length=300,
# #                                     early_stopping=True)
# #     toc = time.perf_counter()

# #     print(f"Got the summary ids for pegasus2 in {toc - tic:0.4f} seconds")
# #     pegasus_summary_2 = [pegasus_tokenizer.decode(g, skip_special_tokens=True, 
# #                            clean_up_tokenization_spaces=False) for g in pegasus_summary_id2]

# #     tic = time.perf_counter()
# #     print(f"Got the final pegasus summary in {tic - toc:0.4f} seconds")
# #     return pegasus_summary_2[0]

#     # get final summary through t5 model
#     total_input_list = t5_tokenizer(["summarize: " + pegasus_summaries], truncation = True, return_tensors = 'tf')
#     tic = time.perf_counter()
#     print(f"Got the input list for t5 in {tic - toc:0.4f} seconds")
#     t5_id =  t5_model.generate(total_input_list['input_ids'],
#                                     num_beams=6,
#                                     no_repeat_ngram_size=5,
#                                     min_length=50,
#                                     max_length=300)
#     toc = time.perf_counter()
#     print(f"Got the t5 id in {toc - tic:0.4f} seconds")

#     t5_summary = [t5_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in t5_id]
#     tic = time.perf_counter()
#     print(f"Got the summary for t5 in {tic - toc:0.4f} seconds")

#     return t5_summary[0]

## Example Run

In [110]:
article_url = 'https://www.nytimes.com/2021/03/16/us/politics/election-interference-russia-2020-assessment.html'

In [111]:
article_url

'https://www.nytimes.com/2021/03/16/us/politics/election-interference-russia-2020-assessment.html'

In [112]:
# from model output but right now manual output
alt_biases = get_alternative_bias('right')

In [113]:
# get summary and date for input into link alternative article functions
summary, date = short_summary(article_url)

Downloaded the article in 0.4277 seconds
Created the summary in 64.8798 seconds


In [114]:
summary

'The US intelligence community has declassified a report on Russia’s efforts to meddle in the 2016 election, laying out how former Vice'

In [119]:
alt_articles_links = link_alternative_articles(article_url, summary, date, alt_biases, data)

NBC News (Online)
ABC News (Online)
The Intercept
Bloomberg
The New Yorker
Wall Street Journal (News)
Christian Science Monitor
Reuters
NPR (Online News)
BBC News


In [129]:
alt_articles_links

[('https://abcnews.go.com/Politics/senate-report-details-russias-efforts-meddle-2016-ties/story?id=72444405',
  'ABC News (Online)'),
 ('https://www.bloomberg.com/news/articles/2021-03-16/u-s-spy-agency-rejects-trump-claim-of-china-election-meddling',
  'Bloomberg'),
 ('https://www.newyorker.com/magazine/2017/03/06/trump-putin-and-the-new-cold-war',
  'The New Yorker'),
 ('https://www.wsj.com/articles/senate-report-affirms-u-s-intelligence-findings-on-2016-russian-interference-11587483408',
  'Wall Street Journal (News)'),
 ('https://www.csmonitor.com/USA/Politics/2019/0514/Amid-growing-concerns-about-2020-a-primer-on-Russian-election-interference',
  'Christian Science Monitor'),
 ('https://www.reuters.com/article/us-usa-trump-russia-q-a/what-we-know-about-u-s-probes-of-russian-meddling-in-2016-election-idUSKBN19604O',
  'Reuters'),
 ('https://www.npr.org/2020/08/18/903616280/sen-mark-warner-discusses-latest-report-on-russias-influence-on-2016-election',
  'NPR (Online News)')]

In [130]:
# unpack urls
alt_articles = url_to_info(alt_articles_links)

In [131]:
alt_articles

[('A new bipartisan report released by a Senate panel Tuesday outlines perhaps the most detailed accounting to date of Russia’s efforts to interfere in the 2016 election while accusing the White House and others close to President Donald Trump of refusing to cooperate with an investigation into whether the president’s campaign simply benefited or sought to aid Russia’s efforts.\n\nThe nearly-1,000 page report from the Senate Intelligence Committee, its fifth and final examination of a years-long effort to probe Russian meddling in 2016, describes several episodes in which Trump and members of his campaign were keen to accept Russia’s help -- and in some instances goes further than even former special counsel Robert Mueller in detailing ties between the campaign and Russian individuals.\n\nHowever, like Mueller’s report released last year, the Senate committee does not allege any criminal conspiracy between Trump or members of his campaign and Russia.\n\nAnd in agreement with the intell

In [132]:
# keep similar articles
zipped_similar = similar_documents(alt_articles)

[0.7664443  0.78021121 0.76341044 0.73409674 0.67016612]
[ True  True  True  True  True]


In [133]:
texts, title, sources, urls= zip(*zipped_similar)

In [96]:
combine_text = " ".join(texts)

In [104]:
texts[0]

'As the intelligence beat reporter for the Washington Post at the time, I watched these agencies grow in size, as dozens of new buildings appeared around the Washington region to house a ballooning workforce of over a million people with top-secret security clearances.\nAnd yet, last year, these vastly larger agencies failed to defend, or even warn, the American public against the most audacious Russian covert operation toward the United States since the end of the Cold War. Only after the fact, when a Russian disinformation campaign had already tainted the 2016 Presidential election, did the Office of the Director of National Intelligence, another vast post-9/11 creation, disclose the Kremlin’s interference.\nFacing one of the clearest domestic threats to the U.S. in a decade, neither the F.B.I., which has the responsibility for conducting counterintelligence inside the United States, nor the O.D.N.I. warned Americans that platoons of Russian-backed automated “bots” and human trolls w

In [97]:
combine_text

'As the intelligence beat reporter for the Washington Post at the time, I watched these agencies grow in size, as dozens of new buildings appeared around the Washington region to house a ballooning workforce of over a million people with top-secret security clearances.\nAnd yet, last year, these vastly larger agencies failed to defend, or even warn, the American public against the most audacious Russian covert operation toward the United States since the end of the Cold War. Only after the fact, when a Russian disinformation campaign had already tainted the 2016 Presidential election, did the Office of the Director of National Intelligence, another vast post-9/11 creation, disclose the Kremlin’s interference.\nFacing one of the clearest domestic threats to the U.S. in a decade, neither the F.B.I., which has the responsibility for conducting counterintelligence inside the United States, nor the O.D.N.I. warned Americans that platoons of Russian-backed automated “bots” and human trolls w

In [88]:
from gensim.summarization.summarizer import summarize

In [90]:
summarize(combine_text)

'The assessment claims that Hillary Clinton, then a Democratic candidate for president, personally approved an effort “to stir up a scandal against U.S. Presidential candidate Donald Trump by tying him to Putin and the Russians\' hacking of the Democratic National Committee.” But in his letter to Graham, Ratcliffe noted that the U.S. intelligence community “does not know the accuracy of this allegation or the extent to which the Russian intelligence analysis may reflect exaggeration or fabrication.”\nAccording to Ratcliffe, former CIA Director John Brennan briefed former President Barack Obama on the Russian assessment, which included the allegation that Clinton approved Intelligence Report: Russia Tried To Help Trump In 2020 Election\nA new report by the U.S. intelligence community on Tuesday says Russia sought to help former President Donald Trump in last year\'s presidential election.\nRussian President Vladimir Putin authorized "influence operations aimed at denigrating President B

In [75]:
title

('Intel chief releases Russian disinfo on Hillary Clinton that was rejected by bipartisan Senate panel',
 'Intelligence Report: Russia Tried To Help Trump In 2020 Election',
 "The Russian Hacking Controversy: What We Do And Don't Know",
 'Amid growing concerns about 2020, a primer on Russian election interference')

In [55]:
sources

('Politico',
 'NPR (Opinion) ',
 'NPR (Online News)',
 'Christian Science Monitor')

In [56]:
urls

('https://www.politico.com/news/2020/09/29/john-ratcliffe-hillary-clinton-russia-423022',
 'https://www.npr.org/2021/03/16/977958302/intelligence-report-russia-tried-to-help-trump-in-2020-election',
 'https://www.npr.org/sections/parallels/2016/12/12/505272992/the-russian-hacking-kerfuffle-what-we-do-and-dont-know',
 'https://www.csmonitor.com/USA/Politics/2019/0514/Amid-growing-concerns-about-2020-a-primer-on-Russian-election-interference')

In [140]:
summary = summarization(texts)

['In 2008, in tandem with Israeli intelligence, the U.S. launched the first digital attack on another country’s critical infrastructure, deploying a “worm,” known as Stuxnet, that was designed to cause centrifuges in Iran to spin out of control and thereby delay its nuclear development.\nKnake recalled, “The question was: ‘O.K., now, what’s the counter-Russia plan?\nAnd the counter-Iran plan?’ ” The difficulty was that, in the aftermath of Stuxnet, the U.S. needed Iran’s coöperation on diplomatic priorities.', 'The nearly-1,000 page report from the Senate Intelligence Committee, its fifth and final examination of a years-long effort to probe Russian meddling in 2016, describes several episodes in which Trump and members of his campaign were keen to accept Russia’s help -- and in some instances goes further than even former special counsel Robert Mueller in detailing ties between the campaign and Russian individuals.\nAnd in agreement with the intelligence community’s 2017 assessment on

In [141]:
summary

['Russia’s alleged interference in the 2016 presidential election was part of a broader effort by Moscow to meddle in the American political system in order to benefit President Donald Trump, according to a report released this week by the special counsel Robert Mueller, who is investigating possible ties between the Trump campaign and Russia.']

2 functions for summarization

1. summarization of article - input an article - output a pegasus sumarization of article - up to one sentence, month and year

2. summarization of similar articles - input urls of articles - pegasus or newspaper (extractive) of articles - get document similarity to make sure talking about the same subject/event/issue, then pegasus & T5 sumarize the summaries to get a general overview of the articles.

In [122]:
        article = Article(article_url)
        article.download()
        article.parse()
        txt = article.text

ArticleException: Article `download()` failed with 403 Client Error: Forbidden for url: https://www.wsj.com/articles/mcconnell-threatens-pileup-if-democrats-change-filibuster-rules-11615908931 on URL https://www.wsj.com/articles/mcconnell-threatens-pileup-if-democrats-change-filibuster-rules-11615908931

In [18]:
txt = 'Speculation that President Joe Biden and the Democratic National Committee are petitioning cellphone carriers to monitor and edit private text messages is false, DNC and White House officials tell the Washington Examiner.\n\nStill, political operatives from both parties have made a practice of monitoring mass text blasts from the other side in recent years as SMS marketing has exploded as a tool for political campaigns, fundraising groups, and even private sector businesses.\n\nThe notion of new Democratic-backed SMS surveillance popped up in conservative circles after Politico published an article Monday that included a line that the DNC and other Biden-allied groups were "planning to engage fact-checkers more aggressively and work with SMS carriers to dispel misinformation about vaccines that is sent over social media and text messages."\n\n"The goal is to ensure that people who may have difficulty getting a vaccination because of issues like transportation see those barriers lessened or removed entirely," the article read.\n\nTRUMP CAMPAIGN FUNDRAISING THREATENED BY SPAM TEXTING ALLEGATIONS\n\nA DNC official told the Washington Examiner that the Democratic Party has been training its grassroots volunteers to sign up for various mass email lists from Republican-affiliated groups since 2019 and to flag noteworthy messages for the communications team.\n\nSimilarly, a Republican digital campaigns expert also confirmed that GOP operatives and volunteers frequently sign up to receive text messages from Democratic campaigns and affiliated groups in order to stay one step ahead of the competition.\n\nReferencing Politico\'s report, the DNC official conceded that they have added another internal step to their SMS process as a means of combating vaccine misinformation but emphatically stated that the DNC has not and will not lobby SMS carriers, such as AT&T, T-Mobile, or Verizon, to monitor any text messages distributed at both the bulk list or peer-to-peer level.\n\nP2P messaging refers to any text conversation exchanged between two people. Bulk list messaging requires recipients to opt-in to receive communications from an automated sender, such as the Biden campaign\'s "30330" and Trump campaign\'s "88022" blasts or even online retailers.\n\nSenders may seek to grow their own reach by either building those lists organically or purchasing numbers from lists owned by other groups or private communications companies.\n\nHOUSE GOP DIGITAL FUNDRAISING ARM OUTRAISES DEMOCRATS FOR FIRST TIME\n\nThe DNC official said that Politico "didn\'t do a great job" and "fueled a lot of embellishment and speculation" about the new initiative. That new process sees communications officials take the misleading bulk texts about vaccines flagged by volunteers and then forward them to SMS aggregator companies, such as Twilio or Bandwidth, "who either work with a mass texting client or have companies that work with mass texting clients," the official said.\n\nThose aggregator companies all have "fair use and abuse" policies but "almost all of the time" have no visibility on what their clients are actually sending, "so all we do is say, \'Hey, did you know that you were sending out these messages?\'" the official added.\n\n"The idea that, like, Joe Biden is reading everyone\'s text messages, that\'s not what happening," the DNC official said.\n\nSources on both sides noted that federal law prohibits SMS carriers from monitoring P2P messaging without a warrant, although carriers may deploy machine learning technology to block some bulk messages that violate fair use practices from being delivered.\n\nWhite House officials also told the Washington Examiner that though some administration officials, such as White House press secretary Jen Psaki and Dr. Anthony Fauci, have ramped up efforts to publicly dispute vaccine misinformation, the administration itself has nothing to do with the DNC initiative.\n\n"We are steadfastly committed to keeping politics out of the effort to get every American vaccinated so that we can save lives and help our economy further recover," White House spokesperson Kevin Munoz previously told Politico. "When we see deliberate efforts to spread misinformation, we view that as an impediment to the country\'s public health and will not shy away from calling that out."\n\nConservatives reacted with ire on social media to Politico\'s report, and some right-leaning television programs told their viewers that the president and his party were working to monitor their private text messages.\n\nYOUNG ADULTS SANK BIDEN\'S VACCINE GOAL. HERE\'S HOW THE WHITE HOUSE PLANS TO GET SHOTS IN THEIR ARMS\n\n"Biden’s regime has announced they’ll be working with SMS providers to stop vaccine \'misinformation\' spread via text messages," freshman Colorado Republican Rep. Lauren Boebert tweeted. "This is on the same day the White House said they support local officials who implement mandatory vaccines. No wonder they can’t condemn Communism."\n\nBiden’s regime has announced they’ll be working with SMS providers to stop vaccine “misinformation” spread via text messages.\n\n\n\nThis is on the same day the White House said they support local officials who implement mandatory vaccines.\n\n\n\nNo wonder they can’t condemn Communism. — Lauren Boebert (@laurenboebert) July 12, 2021\n\nMissouri Republican Sen. Josh Hawley suggested that the misinformation campaign was an effort to "force vaccine compliance and who knows what else."\n\nCLICK HERE TO READ MORE FROM THE WASHINGTON EXAMINER\n\nSo now the Biden Administration wants to get into people’s text messages … to force vaccine compliance and who knows what else https://t.co/Q1v1qkOOfB — Josh Hawley (@HawleyMO) July 13, 2021\n\n"Yes, we can allay the concerns of the excessively paranoid by tracking the content they send via text message," Noah Rothman, an editor at Commentary, wrote.'


In [31]:
list_txt = [['For the first time since the Cold War, the United States has publicly admitted that Russia meddled in the 2016 presidential election in order to help Donald Trump and hurt his Democratic opponent, Hillary Clinton, by damaging their election and undermining public faith in the American political system, according to a new report.'], ['With new details from the Mueller report about Russia’s interference in the 2016 presidential election, an overview of what we know about what happened last time around and what the United States is doing to prevent another such attack, here’s a new sense of urgency around the question of what the U.S.'], ['Here is a guide to the controversy surrounding alleged Russian meddling in the 2016 presidential election and possible ties between President Donald Trump’s campaign team and Moscow, as well as the various investigations into the matter. (This article has been revised to reflect that the special counsel has been appointed, not that the Justice Department has appointed a special counsel.'], ['A look at some of the claims made in a series of letters sent by top Republicans and Democrats to the White House last week about alleged Russian interference in last year’s election and President Donald Trump’s links to the Kremlin, as reported by the Associated Press and other news outlets on Tuesday.']]

In [41]:
newlist = [item for items in list_txt for item in items]

In [42]:
newlist

['For the first time since the Cold War, the United States has publicly admitted that Russia meddled in the 2016 presidential election in order to help Donald Trump and hurt his Democratic opponent, Hillary Clinton, by damaging their election and undermining public faith in the American political system, according to a new report.',
 'With new details from the Mueller report about Russia’s interference in the 2016 presidential election, an overview of what we know about what happened last time around and what the United States is doing to prevent another such attack, here’s a new sense of urgency around the question of what the U.S.',
 'Here is a guide to the controversy surrounding alleged Russian meddling in the 2016 presidential election and possible ties between President Donald Trump’s campaign team and Moscow, as well as the various investigations into the matter. (This article has been revised to reflect that the special counsel has been appointed, not that the Justice Departmen

In [29]:
" ".join(txt.split(" ")[:200])

'Speculation that President Joe Biden and the Democratic National Committee are petitioning cellphone carriers to monitor and edit private text messages is false, DNC and White House officials tell the Washington Examiner.\n\nStill, political operatives from both parties have made a practice of monitoring mass text blasts from the other side in recent years as SMS marketing has exploded as a tool for political campaigns, fundraising groups, and even private sector businesses.\n\nThe notion of new Democratic-backed SMS surveillance popped up in conservative circles after Politico published an article Monday that included a line that the DNC and other Biden-allied groups were "planning to engage fact-checkers more aggressively and work with SMS carriers to dispel misinformation about vaccines that is sent over social media and text messages."\n\n"The goal is to ensure that people who may have difficulty getting a vaccination because of issues like transportation see those barriers lessen

In [None]:
["’"]?(A-Z)(((Mr|Ms|Mrs|Dr|Capt|Col)\.\s+((?!\w{2,}[.?!][’"]?\s+["’]?[A-Z]).))?)((?![.?!]["’]?\s+["’]?[A-Z]).)[.?!]+["’"]?

In [None]:
    tic = time.perf_counter()
    toc = time.perf_counter()
    print(f"Downloaded the tutorial in {toc - tic:0.4f} seconds")